diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 46959146..9f77688a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,7 @@ updates: directory: "/" schedule: interval: "daily" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml index 9b28abf4..5468e6d0 100644 --- a/.github/workflows/auto-merge.yml +++ b/.github/workflows/auto-merge.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Dependabot metadata id: metadata - uses: dependabot/fetch-metadata@v1.3.3 + uses: dependabot/fetch-metadata@v2.4.0 with: github-token: "${{ secrets.GITHUB_TOKEN }}" - name: Enable auto-merge for Dependabot PRs diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index fc02f2fe..02f75a3e 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -5,8 +5,8 @@ on: branches: - main - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages permissions: @@ -25,9 +25,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v5 - name: Setup Pages - uses: actions/configure-pages@v2 + uses: actions/configure-pages@v5 - name: Set up Ruby uses: ruby/setup-ruby@v1 with: @@ -39,7 +39,7 @@ jobs: rdoc --main README.md --op _site --exclude={Gemfile,Rakefile,"coverage/*","vendor/*","bin/*","test/*","tmp/*"} cp -r doc _site/doc - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v4 # Deployment job deploy: @@ -51,4 +51,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9f95cc9d..468591bd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,7 +1,9 @@ name: Main + on: - push - pull_request + jobs: ci: strategy: @@ -11,13 +13,15 @@ jobs: - '2.7.0' - '3.0' - '3.1' - - head + - '3.2' + - '3.3' + - '3.4' - truffleruby-head name: CI runs-on: ubuntu-latest env: CI: true - TESTOPTS: --verbose + # TESTOPTS: --verbose steps: - uses: actions/checkout@master - uses: ruby/setup-ruby@v1 @@ -37,7 +41,7 @@ jobs: - uses: ruby/setup-ruby@v1 with: bundler-cache: true - ruby-version: '3.1' + ruby-version: '3.2' - name: Check run: | bundle exec rake stree:check diff --git a/.gitignore b/.gitignore index 69755243..3ce1e327 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ /coverage/ /pkg/ /rdocs/ +/sorbet/ /spec/reports/ /tmp/ /vendor/ diff --git a/.rubocop.yml b/.rubocop.yml index daf5a824..1b81a535 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -7,9 +7,12 @@ AllCops: SuggestExtensions: false TargetRubyVersion: 2.7 Exclude: - - '{.git,.github,bin,coverage,pkg,test/fixtures,vendor,tmp}/**/*' + - '{.git,.github,.ruby-lsp,bin,coverage,doc,pkg,sorbet,spec,test/fixtures,vendor,tmp}/**/*' - test.rb +Gemspec/DevelopmentDependencies: + Enabled: false + Layout/LineLength: Max: 80 @@ -25,6 +28,9 @@ Lint/AmbiguousRange: Lint/BooleanSymbol: Enabled: false +Lint/Debugger: + Enabled: false + Lint/DuplicateBranch: Enabled: false @@ -43,6 +49,9 @@ Lint/NonLocalExitFromIterator: Lint/RedundantRequireStatement: Enabled: false +Lint/RescueException: + Enabled: false + Lint/SuppressedException: Enabled: false @@ -73,6 +82,9 @@ Security/Eval: Style/AccessorGrouping: Enabled: false +Style/Alias: + Enabled: false + Style/CaseEquality: Enabled: false @@ -82,6 +94,9 @@ Style/CaseLikeIf: Style/ClassVars: Enabled: false +Style/CombinableLoops: + Enabled: false + Style/DocumentDynamicEvalDefinition: Enabled: false @@ -103,6 +118,9 @@ Style/FormatStringToken: Style/GuardClause: Enabled: false +Style/HashLikeCase: + Enabled: false + Style/IdenticalConditionalBranches: Enabled: false @@ -118,6 +136,9 @@ Style/KeywordParametersOrder: Style/MissingRespondToMissing: Enabled: false +Style/MultipleComparison: + Enabled: false + Style/MutableConstant: Enabled: false @@ -136,6 +157,12 @@ Style/ParallelAssignment: Style/PerlBackrefs: Enabled: false +Style/RedundantArrayConstructor: + Enabled: false + +Style/RedundantParentheses: + Enabled: false + Style/SafeNavigation: Enabled: false @@ -144,3 +171,6 @@ Style/SpecialGlobalVars: Style/StructInheritance: Enabled: false + +Style/YodaExpression: + Enabled: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 20808e3b..4ad42fc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,176 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [6.3.0] - 2025-07-16 + +### Added + +- The `--extension` command line option has been added to the CLI to specify what type of content is coming from stdin. +- The `--config` command line option has been added to the CLI to specify the path to the configuration file. + +### Changed + +- Fix formatting of character literals when single quotes is enabled. +- Pass ignore files option to the language server. +- Hash keys should remain unchanged when there are any omitted values in the hash. +- We now properly handle compilation errors in the parser. + +## [6.2.0] - 2023-09-20 + +### Added + +- Fix `WithScope` for destructured post arguments. + +### Changed + +- Always use `do`/`end` for multi-line lambdas. + +## [6.1.1] - 2023-03-21 + +### Changed + +- Fixed a bug where the call chain formatter was incorrectly looking at call messages. + +## [6.1.0] - 2023-03-20 + +### Added + +- The `stree ctags` command for generating ctags like `universal-ctags` or `ripper-tags` would. +- The `definedivar` YARV instruction has been added to match CRuby's implementation. +- We now generate better Sorbet RBI files for the nodes in the tree and the visitors. +- `SyntaxTree::Reflection.nodes` now includes the visitor method. + +### Changed + +- We now explicitly require `pp` in environments that need it. + +## [6.0.2] - 2023-03-03 + +### Added + +- The `WithScope` visitor mixin will now additionally report local variables defined through regular expression named captures. +- The `WithScope` visitor mixin now properly handles destructured splat arguments in required positions. + +### Changed + +- Fixed the AST output by adding blocks to `Command` and `CommandCall` nodes in the `FieldVisitor`. +- Fixed the location of lambda local variables (e.g., `->(; a) {}`). + +## [6.0.1] - 2023-02-26 + +### Added + +- The class declarations returned as the result of the indexing operation now have their superclass as a field. It is returned as an array of constants. If the superclass is anything other than a constant lookup, then it raises an error. + +### Changed + +- The `nesting` field on the results of the indexing operation is no longer a single flat array. Instead it is an array of arrays, where each array is a single nesting level. This more accurately reflects the nesting of the nodes in the tree. For example, `class Foo::Bar::Baz; end` would result in `[Foo, Bar, Baz]`, but that incorrectly implies that you can see constants at each of those levels. Now this would result in `[[Foo, Bar, Baz]]` to indicate that it can see either the top level or constants within the scope of `Foo::Bar::Baz` only. +- When formatting hashes that have omitted values and mixed hash rockets with labels, the formatting now maintains whichever delimiter was used in the source. This is because forcing the use of hash rockets with omitted values results in a syntax error. +- Handle the case where a bare hash is used after the `break`, `next`, or `return` keywords. Previously this would result in hash labels which is not valid syntax. Now it maintains the delimiters used in the source. +- The `<<` operator will now break on chained `<<` expressions. Previously it would always stay flat. + +## [6.0.0] - 2023-02-10 + +### Added + +- `SyntaxTree::BasicVisitor::visit_methods` has been added to allow you to check multiple visit methods inside of a block. There _was_ a method called `visit_methods` previously, but it was undocumented because it was meant as a private API. That method has been renamed to `valid_visit_methods`. +- `rake sorbet:rbi` has been added as a task within the repository to generate an RBI file corresponding to the nodes in the tree. This can be used to help aid consumers of Syntax Tree that are using Sorbet. +- `SyntaxTree::Reflection` has been added to allow you to get information about the nodes in the tree. It is not required by default, since it takes a small amount of time to parse `node.rb` and get all of the information. +- `SyntaxTree::Node#to_mermaid` has been added to allow you to generate a Mermaid diagram of the node and its children. This is useful for debugging and understanding the structure of the tree. +- `SyntaxTree::Translation` has been added as an experimental API to transform the Syntax Tree syntax tree into the syntax trees represented by the whitequark/parser and rubocop/rubocop-ast gems. + - `SyntaxTree::Translation.to_parser(node, buffer)` will return a `Parser::AST::Node` object. + - `SyntaxTree::Translation.to_rubocop_ast(node, buffer)` will return a `RuboCop::AST::Node` object. +- `SyntaxTree::index` and `SyntaxTree::index_file` have been added to allow you to get a list of all of the classes, modules, and methods defined in a given source string or file. +- Various convenience methods have been added: + - `SyntaxTree::format_file` - which calls format with the result of reading the file + - `SyntaxTree::format_node` - which formats the node directly + - `SyntaxTree::parse_file` - which calls parse with the result of reading the file + - `SyntaxTree::search_file` - which calls search with the result of reading the file + - `SyntaxTree::Node#start_char` - which is the same as calling `node.location.start_char` + - `SyntaxTree::Node#end_char` - which is the same as calling `node.location.end_char` +- `SyntaxTree::Assoc` nodes can now be formatted on their own without a parent hash node. +- `SyntaxTree::BlockVar#arg0?` has been added to check if a single required block parameter is present and would potentially be expanded. +- More experimental APIs have been added to the `SyntaxTree::YARV` module, including: + - `SyntaxTree::YARV::ControlFlowGraph` + - `SyntaxTree::YARV::DataFlowGraph` + - `SyntaxTree::YARV::SeaOfNodes` + +### Changed + +#### Major changes + +- *BREAKING* Updates to `WithEnvironment`: + - The `WithEnvironment` module has been renamed to `WithScope`. + - The `current_environment` method has been renamed to `current_scope`. + - The `with_current_environment` method has been removed. + - Previously scopes were always able to look up the tree, as in: `a = 1; def foo; a = 2; end` would see only a single `a` variable. That has been corrected. + - Previously accessing variables from inside of blocks that were not shadowed would mark them as being local to the block only. This has been correct. +- *BREAKING* Lots of constants moved out of `SyntaxTree::Visitor` to just `SyntaxTree`: + * `SyntaxTree::Visitor::FieldVisitor` is now `SyntaxTree::FieldVisitor` + * `SyntaxTree::Visitor::JSONVisitor` is now `SyntaxTree::JSONVisitor` + * `SyntaxTree::Visitor::MatchVisitor` is now `SyntaxTree::MatchVisitor` + * `SyntaxTree::Visitor::MutationVisitor` is now `SyntaxTree::MutationVisitor` + * `SyntaxTree::Visitor::PrettyPrintVisitor` is now `SyntaxTree::PrettyPrintVisitor` +- *BREAKING* Lots of constants are now autoloaded instead of required by default. This is only particularly relevant if you are in a forking environment and want to preload constants before forking for better memory usage with copy-on-write. +- *BREAKING* The `SyntaxTree::Statements#initialize` method no longer accepts a parser as the first argument. It now mirrors the other nodes in that it accepts its children and location. As a result, Syntax Tree nodes are now marshalable (and therefore can be sent over DRb). Previously the `Statements` node was not able to be marshaled because it held a reference to the parser. + +#### Minor changes + +- Many places where embedded documents (`=begin` to `=end`) were being treated as real comments have been fixed for formatting. +- Dynamic symbols in keyword pattern matching now have better formatting. +- Endless method definitions used to have a `SyntaxTree::BodyStmt` node that had any kind of node as its `statements` field. That has been corrected to be more consistent such that now going from `def_node.bodystmt.statements` always returns a `SyntaxTree::Statements` node, which is more consistent. +- We no longer assume that `fiddle` is able to be required, and only require it when it is actually needed. + +#### Tiny changes + +- Empty parameter nodes within blocks now have more accurate location information. +- Pinned variables have more correct location information now. (Previously the location was just around the variable itself, but it now includes the pin.) +- Array patterns in pattern matching now have more accurate location information when they are using parentheses with a constant present. +- Find patterns in pattern matching now have more correct location information for their `left` and `right` fields. +- Lots of nodes have more correct types in the comments on their attributes. +- The expressions `break foo.bar :baz do |qux| qux end` and `next fun foo do end` now correctly parses as a control-flow statement with a method call that has a block attached, as opposed to a control-flow statement with a block attached. +- The expression `self::a, b = 1, 2` would previously yield a `SyntaxTree::ConstPathField` node for the first element of the left-hand-side of the multiple assignment. Semantically this is incorrect, and we have fixed this to now be a `SyntaxTree::Field` node instead. + +## [5.3.0] - 2023-01-26 + +### Added + +- `#arity` has been added to `DefNode`, `BlockNode`, and `Params`. The method returns a range where the lower bound is the minimum and the upper bound is the maximum number of arguments that can be used to invoke that block/method definition. +- `#arity` has been added to `CallNode`, `Command`, `CommandCall`, and `VCall` nodes. The method returns the number of arguments included in the invocation. For splats, double splats, or argument forwards, this method returns `Float::INFINITY`. +- `SyntaxTree::index` and `SyntaxTree::index_file` APIs have been added to collect a list of classes, modules, and methods defined in a given source string or file, respectively. These APIs are experimental and subject to change. +- A `plugin/disable_auto_ternary` plugin has been added the disables the formatted that automatically changes permissable `if/else` clauses into ternaries. + +### Changed + +- Files are now only written from the CLI if the content of them changes, which should match watching files less chaotic. +- In the case that `rb_iseq_load` cannot be found, `Fiddle::DLError` is now rescued. +- Previously if there were invalid UTF-8 byte sequences after the `__END__` keyword the parser could potentially have crashed when parsing comments. This has been fixed. +- Previously there was special formatting for array literals that contained only variable references (either locals, method calls, or constants). For consistency, this has been removed and all array literals are now formatted the same way. + +## [5.2.0] - 2023-01-04 + +### Added + +- An experiment in evaluating compiled instruction sequences has been added to Syntax Tree. This is subject to change, so it will not be well documented or testing at the moment. It does not impact other functionality. + +### Changed + +- Empty parentheses on method calls will now be left in place. Previously they were left in place if the method being called looked like a constant. Now they are left in place for all method calls since the method name can mirror the name of a local variable, in which case the parentheses are required. + +## [5.1.0] - 2022-12-28 + +### Added + +- An experiment in working with instruction sequences has been added to Syntax Tree. This is subject to change, so it is not well documented or tested at the moment. It does not impact other functionality. +- You can now format at a different base layer of indentation. This is an optional third argument to `SyntaxTree::format`. + ### Changed - Support forwarding anonymous keyword arguments with `**`. +- The `BodyStmt` node now has a more correct location information. +- Ignore the `textDocument/documentColor` request coming into the language server to support clients that require that request be received. +- Do not attempt to convert `if..else` into ternaries if the predicate has a `Binary` node. +- Properly handle nested pattern matching when a rightward assignment is inside a `when` clause. ## [5.0.1] - 2022-11-10 @@ -460,7 +627,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.2.0...HEAD +[6.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.1.1...v6.2.0 +[6.1.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.1.0...v6.1.1 +[6.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.2...v6.1.0 +[6.0.2]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.1...v6.0.2 +[6.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v6.0.0...v6.0.1 +[6.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.3.0...v6.0.0 +[5.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.2.0...v5.3.0 +[5.2.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...v5.2.0 +[5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 [5.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.0...v5.0.1 [5.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.3.0...v5.0.0 [4.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.2.0...v4.3.0 diff --git a/Gemfile b/Gemfile index be173b20..b4252fb5 100644 --- a/Gemfile +++ b/Gemfile @@ -3,3 +3,5 @@ source "https://rubygems.org" gemspec + +gem "fiddle" diff --git a/Gemfile.lock b/Gemfile.lock index cddd3f21..7be345b8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,44 +1,53 @@ PATH remote: . specs: - syntax_tree (5.0.1) - prettier_print (>= 1.1.0) + syntax_tree (6.3.0) + prettier_print (>= 1.2.0) GEM remote: https://rubygems.org/ specs: - ast (2.4.2) - docile (1.4.0) - json (2.6.3) - minitest (5.16.3) - parallel (1.22.1) - parser (3.1.3.0) + ast (2.4.3) + docile (1.4.1) + fiddle (1.1.8) + json (2.15.1) + language_server-protocol (3.17.0.5) + lint_roller (1.1.0) + minitest (5.26.0) + parallel (1.27.0) + parser (3.3.9.0) ast (~> 2.4.1) - prettier_print (1.1.0) + racc + prettier_print (1.2.1) + prism (1.6.0) + racc (1.8.1) rainbow (3.1.1) - rake (13.0.6) - regexp_parser (2.6.1) - rexml (3.2.5) - rubocop (1.41.1) + rake (13.3.1) + regexp_parser (2.11.3) + rubocop (1.81.6) json (~> 2.3) + language_server-protocol (~> 3.17.0.2) + lint_roller (~> 1.1.0) parallel (~> 1.10) - parser (>= 3.1.2.1) + parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) - regexp_parser (>= 1.8, < 3.0) - rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.23.0, < 2.0) + regexp_parser (>= 2.9.3, < 3.0) + rubocop-ast (>= 1.47.1, < 2.0) ruby-progressbar (~> 1.7) - unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.24.0) - parser (>= 3.1.1.0) - ruby-progressbar (1.11.0) - simplecov (0.21.2) + unicode-display_width (>= 2.4.0, < 4.0) + rubocop-ast (1.47.1) + parser (>= 3.3.7.2) + prism (~> 1.4) + ruby-progressbar (1.13.0) + simplecov (0.22.0) docile (~> 1.1) simplecov-html (~> 0.11) simplecov_json_formatter (~> 0.1) - simplecov-html (0.12.3) + simplecov-html (0.13.1) simplecov_json_formatter (0.1.4) - unicode-display_width (2.3.0) + unicode-display_width (3.2.0) + unicode-emoji (~> 4.1) + unicode-emoji (4.1.0) PLATFORMS arm64-darwin-21 @@ -49,6 +58,7 @@ PLATFORMS DEPENDENCIES bundler + fiddle minitest rake rubocop diff --git a/README.md b/README.md index 7a943ca8..c238620e 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ It is built with only standard library dependencies. It additionally ships with - [CLI](#cli) - [ast](#ast) - [check](#check) + - [ctags](#ctags) - [expr](#expr) - [format](#format) - [json](#json) @@ -29,6 +30,7 @@ It is built with only standard library dependencies. It additionally ships with - [SyntaxTree.format(source)](#syntaxtreeformatsource) - [SyntaxTree.mutation(&block)](#syntaxtreemutationblock) - [SyntaxTree.search(source, query, &block)](#syntaxtreesearchsource-query-block) + - [SyntaxTree.index(source)](#syntaxtreeindexsource) - [Nodes](#nodes) - [child_nodes](#child_nodes) - [copy(**attrs)](#copyattrs) @@ -40,9 +42,10 @@ It is built with only standard library dependencies. It additionally ships with - [construct_keys](#construct_keys) - [Visitor](#visitor) - [visit_method](#visit_method) + - [visit_methods](#visit_methods) - [BasicVisitor](#basicvisitor) - [MutationVisitor](#mutationvisitor) - - [WithEnvironment](#withenvironment) + - [WithScope](#withscope) - [Language server](#language-server) - [textDocument/formatting](#textdocumentformatting) - [textDocument/inlayHint](#textdocumentinlayhint) @@ -137,6 +140,33 @@ To change the print width that you are checking against, specify the `--print-wi stree check --print-width=100 path/to/file.rb ``` +### ctags + +This command will output to stdout a set of tags suitable for usage with [ctags](https://github.com/universal-ctags/ctags). + +```sh +stree ctags path/to/file.rb +``` + +For a file containing the following Ruby code: + +```ruby +class Foo +end + +class Bar < Foo +end +``` + +you will receive: + +``` +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +Bar test.rb /^class Bar < Foo$/;" c inherits:Foo +Foo test.rb /^class Foo$/;" c +``` + ### expr This command will output a Ruby case-match expression that would match correctly against the first expression of the input. @@ -157,7 +187,7 @@ SyntaxTree::Binary[ ### format -This command will output the formatted version of each of the listed files. Importantly, it will not write that content back to the source files. It is meant to display the formatted version only. +This command will output the formatted version of each of the listed files to stdout. Importantly, it will not write that content back to the source files – for that, you want [`write`](#write). ```sh stree format path/to/file.rb @@ -267,7 +297,7 @@ Note that the output of the `match` CLI command creates a valid pattern that can ### write -This command will format the listed files and write that formatted version back to the source files. Note that this overwrites the original content, to be sure to be using a version control system. +This command will format the listed files and write that formatted version back to the source files. Note that this overwrites the original content, so be sure to be using a version control system. ```sh stree write path/to/file.rb @@ -340,12 +370,16 @@ This function takes an input string containing Ruby code, parses it into its und ### SyntaxTree.mutation(&block) -This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::Visitor::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. +This function yields a new mutation visitor to the block, and then returns the initialized visitor. It's effectively a shortcut for creating a `SyntaxTree::MutationVisitor` without having to remember the class name. For more information on that visitor, see the definition below. ### SyntaxTree.search(source, query, &block) This function takes an input string containing Ruby code, an input string containing a valid Ruby `in` clause expression that can be used to match against nodes in the tree (can be generated using `stree expr`, `stree match`, or `Node#construct_keys`), and a block. Each node that matches the given query will be yielded to the block. The block will receive the node as its only argument. +### SyntaxTree.index(source) + +This function takes an input string containing Ruby code and returns a list of all of the class declarations, module declarations, and method definitions within a file. Each of the entries also has access to its associated comments. This is useful for generating documentation or index information for a file to support something like go-to-definition. + ## Nodes There are many different node types in the syntax tree. They are meant to be treated as immutable structs containing links to child nodes with minimal logic contained within their implementation. However, for the most part they all respond to a certain set of APIs, listed below. @@ -491,7 +525,7 @@ With visitors, you only define handlers for the nodes that you need. You can fin * call `visit(child)` with each child that you want to visit * call nothing if you're sure you don't want to descend further -There are a couple of visitors that ship with Syntax Tree that can be used as examples. They live in the [lib/syntax_tree/visitor](lib/syntax_tree/visitor) directory. +There are a couple of visitors that ship with Syntax Tree that can be used as examples. They live in the [lib/syntax_tree](lib/syntax_tree) directory. ### visit_method @@ -517,6 +551,26 @@ Did you mean? visit_binary from bin/console:8:in `
' ``` +### visit_methods + +Similar to `visit_method`, `visit_methods` also checks that methods defined are valid visit methods. This variation however accepts a block and checks that all methods defined within that block are valid visit methods. It's meant to be used like: + +```ruby +class ArithmeticVisitor < SyntaxTree::Visitor + visit_methods do + def visit_binary(node) + # ... + end + + def visit_int(node) + # ... + end + end +end +``` + +This is only checked when the methods are defined and does not impose any kind of runtime overhead after that. It is very useful for upgrading versions of Syntax Tree in case these methods names change. + ### BasicVisitor When you're defining your own visitor, by default it will walk down the tree even if you don't define `visit_*` methods. This is to ensure you can define a subset of the necessary methods in order to only interact with the nodes you're interested in. If you'd like to change this default to instead raise an error if you visit a node you haven't explicitly handled, you can instead inherit from `BasicVisitor`. @@ -537,7 +591,7 @@ The `MutationVisitor` is a visitor that can be used to mutate the tree. It works ```ruby # Create a new visitor -visitor = SyntaxTree::Visitor::MutationVisitor.new +visitor = SyntaxTree::MutationVisitor.new # Specify that it should mutate If nodes with assignments in their predicates visitor.mutate("IfNode[predicate: Assign | OpAssign]") do |node| @@ -567,20 +621,18 @@ SyntaxTree::Formatter.format(source, program.accept(visitor)) # => "if (a = 1)\nend\n" ``` -### WithEnvironment +### WithScope -The `WithEnvironment` module can be included in visitors to automatically keep track of local variables and arguments -defined inside each environment. A `current_environment` accessor is made available to the request, allowing it to find -all usages and definitions of a local. +The `WithScope` module can be included in visitors to automatically keep track of local variables and arguments defined inside each scope. A `current_scope` accessor is made available to the request, allowing it to find all usages and definitions of a local. ```ruby class MyVisitor < Visitor - include WithEnvironment + prepend WithScope def visit_ident(node) # find_local will return a Local for any local variables or arguments # present in the current environment or nil if the identifier is not a local - local = current_environment.find_local(node) + local = current_scope.find_local(node) puts local.type # the type of the local (:variable or :argument) puts local.definitions # the array of locations where this local is defined @@ -658,6 +710,7 @@ To register plugins, define a file somewhere in your load path named `syntax_tre * `plugin/single_quotes` - This will change all of your string literals to use single quotes instead of the default double quotes. * `plugin/trailing_comma` - This will put trailing commas into multiline array literals, hash literals, and method calls that can support trailing commas. +* `plugin/disable_auto_ternary` - This will prevent the automatic conversion of `if ... else` to ternary expressions. If you're using Syntax Tree as a library, you can require those files directly or manually pass those options to the formatter initializer through the `SyntaxTree::Formatter::Options` class. @@ -763,6 +816,7 @@ inherit_gem: * [Neovim](https://neovim.io/) - [neovim/nvim-lspconfig](https://github.com/neovim/nvim-lspconfig). * [Vim](https://www.vim.org/) - [dense-analysis/ale](https://github.com/dense-analysis/ale). * [VSCode](https://code.visualstudio.com/) - [ruby-syntax-tree/vscode-syntax-tree](https://github.com/ruby-syntax-tree/vscode-syntax-tree). +* [Emacs](https://www.gnu.org/software/emacs/) - [emacs-format-all-the-code](https://github.com/lassik/emacs-format-all-the-code). ## Contributing diff --git a/Rakefile b/Rakefile index 4973d45e..fb4f8847 100644 --- a/Rakefile +++ b/Rakefile @@ -4,6 +4,8 @@ require "bundler/gem_tasks" require "rake/testtask" require "syntax_tree/rake_tasks" +Rake.add_rakelib "tasks" + Rake::TestTask.new(:test) do |t| t.libs << "test" t.libs << "lib" @@ -14,7 +16,16 @@ task default: :test configure = ->(task) do task.source_files = - FileList[%w[Gemfile Rakefile syntax_tree.gemspec lib/**/*.rb test/*.rb]] + FileList[ + %w[ + Gemfile + Rakefile + syntax_tree.gemspec + lib/**/*.rb + tasks/*.rake + test/*.rb + ] + ] # Since Syntax Tree supports back to Ruby 2.7.0, we need to make sure that we # format our code such that it's compatible with that version. This actually diff --git a/bin/console b/bin/console index 1c18bd62..6f35f1ec 100755 --- a/bin/console +++ b/bin/console @@ -3,6 +3,7 @@ require "bundler/setup" require "syntax_tree" +require "syntax_tree/reflection" require "irb" IRB.start(__FILE__) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 1357e95f..90fb7fe7 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,48 +1,41 @@ # frozen_string_literal: true -require "etc" -require "fiddle" -require "json" -require "pp" require "prettier_print" +require "pp" require "ripper" -require "stringio" -require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" -require_relative "syntax_tree/dsl" -require_relative "syntax_tree/version" - require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/field_visitor" -require_relative "syntax_tree/visitor/json_visitor" -require_relative "syntax_tree/visitor/match_visitor" -require_relative "syntax_tree/visitor/mutation_visitor" -require_relative "syntax_tree/visitor/pretty_print_visitor" -require_relative "syntax_tree/visitor/environment" -require_relative "syntax_tree/visitor/with_environment" +require_relative "syntax_tree/formatter" require_relative "syntax_tree/parser" -require_relative "syntax_tree/pattern" -require_relative "syntax_tree/search" - -require_relative "syntax_tree/yarv" -require_relative "syntax_tree/yarv/bf" -require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/decompiler" -require_relative "syntax_tree/yarv/disassembler" -require_relative "syntax_tree/yarv/instruction_sequence" -require_relative "syntax_tree/yarv/instructions" -require_relative "syntax_tree/yarv/legacy" -require_relative "syntax_tree/yarv/local_table" -require_relative "syntax_tree/yarv/assembler" +require_relative "syntax_tree/version" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to # build formatters, linters, language servers, and more. module SyntaxTree + # Syntax Tree the library has many features that aren't always used by the + # CLI. Requiring those features takes time, so we autoload as many constants + # as possible in order to keep the CLI as fast as possible. + + autoload :Database, "syntax_tree/database" + autoload :DSL, "syntax_tree/dsl" + autoload :FieldVisitor, "syntax_tree/field_visitor" + autoload :Index, "syntax_tree/index" + autoload :JSONVisitor, "syntax_tree/json_visitor" + autoload :LanguageServer, "syntax_tree/language_server" + autoload :MatchVisitor, "syntax_tree/match_visitor" + autoload :Mermaid, "syntax_tree/mermaid" + autoload :MermaidVisitor, "syntax_tree/mermaid_visitor" + autoload :MutationVisitor, "syntax_tree/mutation_visitor" + autoload :Pattern, "syntax_tree/pattern" + autoload :PrettyPrintVisitor, "syntax_tree/pretty_print_visitor" + autoload :Search, "syntax_tree/search" + autoload :WithScope, "syntax_tree/with_scope" + # This holds references to objects that respond to both #parse and #format # so that we can use them in the CLI. HANDLERS = {} @@ -57,39 +50,84 @@ module SyntaxTree # It shouldn't really be changed except in very niche circumstances. DEFAULT_RUBY_VERSION = Formatter::SemanticVersion.new(RUBY_VERSION).freeze - # This is a hook provided so that plugins can register themselves as the - # handler for a particular file type. - def self.register_handler(extension, handler) - HANDLERS[extension] = handler + # The default indentation level for formatting. We allow changing this so + # that Syntax Tree can format arbitrary parts of a document. + DEFAULT_INDENTATION = 0 + + # Parses the given source and returns the formatted source. + def self.format( + source, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format_node( + source, + parse(source), + maxwidth, + base_indentation, + options: options + ) end - # Parses the given source and returns the syntax tree. - def self.parse(source) - parser = Parser.new(source) - response = parser.parse - response unless parser.error? + # Parses the given file and returns the formatted source. + def self.format_file( + filepath, + maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, + options: Formatter::Options.new + ) + format(read(filepath), maxwidth, base_indentation, options: options) end - # Parses the given source and returns the formatted source. - def self.format( + # Accepts a node in the tree and returns the formatted source. + def self.format_node( source, + node, maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, options: Formatter::Options.new ) formatter = Formatter.new(source, [], maxwidth, options: options) - parse(source).format(formatter) + node.format(formatter) - formatter.flush + formatter.flush(base_indentation) formatter.output.join end + # Indexes the given source code to return a list of all class, module, and + # method definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index(source) + Index.index(source) + end + + # Indexes the given file to return a list of all class, module, and method + # definitions. Used to quickly provide indexing capability for IDEs or + # documentation generation. + def self.index_file(filepath) + Index.index_file(filepath) + end + # A convenience method for creating a new mutation visitor. def self.mutation - visitor = Visitor::MutationVisitor.new + visitor = MutationVisitor.new yield visitor visitor end + # Parses the given source and returns the syntax tree. + def self.parse(source) + parser = Parser.new(source) + response = parser.parse + response unless parser.error? + end + + # Parses the given file and returns the syntax tree. + def self.parse_file(filepath) + parse(read(filepath)) + end + # Returns the source from the given filepath taking into account any potential # magic encoding comments. def self.read(filepath) @@ -105,9 +143,24 @@ def self.read(filepath) File.read(filepath, encoding: encoding) end + # This is a hook provided so that plugins can register themselves as the + # handler for a particular file type. + def self.register_handler(extension, handler) + HANDLERS[extension] = handler + end + # Searches through the given source using the given pattern and yields each # node in the tree that matches the pattern to the given block. def self.search(source, query, &block) - Search.new(Pattern.new(query).compile).scan(parse(source), &block) + pattern = Pattern.new(query).compile + program = parse(source) + + Search.new(pattern).scan(program, &block) + end + + # Searches through the given file using the given pattern and yields each + # node in the tree that matches the pattern to the given block. + def self.search_file(filepath, query, &block) + search(read(filepath), query, &block) end end diff --git a/lib/syntax_tree/basic_visitor.rb b/lib/syntax_tree/basic_visitor.rb index 34b7876e..bd8ea5f2 100644 --- a/lib/syntax_tree/basic_visitor.rb +++ b/lib/syntax_tree/basic_visitor.rb @@ -29,7 +29,7 @@ def initialize(error) def corrections @corrections ||= DidYouMean::SpellChecker.new( - dictionary: Visitor.visit_methods + dictionary: BasicVisitor.valid_visit_methods ).correct(visit_method) end @@ -40,7 +40,40 @@ def corrections end end + # This module is responsible for checking all of the methods defined within + # a given block to ensure that they are valid visit methods. + class VisitMethodsChecker < Module + Status = Struct.new(:checking) + + # This is the status of the checker. It's used to determine whether or not + # we should be checking the methods that are defined. It is kept as an + # instance variable so that it can be disabled later. + attr_reader :status + + def initialize + # We need the status to be an instance variable so that it can be + # accessed by the disable! method, but also a local variable so that it + # can be captured by the define_method block. + status = @status = Status.new(true) + + define_method(:method_added) do |name| + BasicVisitor.visit_method(name) if status.checking + super(name) + end + end + + def disable! + status.checking = false + end + end + class << self + # This is the list of all of the valid visit methods. + def valid_visit_methods + @valid_visit_methods ||= + Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + end + # This method is here to help folks write visitors. # # It's not always easy to ensure you're writing the correct method name in @@ -51,15 +84,21 @@ class << self # name. It will raise an error if the visit method you're defining isn't # actually a method on the parent visitor. def visit_method(method_name) - return if visit_methods.include?(method_name) + return if valid_visit_methods.include?(method_name) raise VisitMethodError, method_name end - # This is the list of all of the valid visit methods. + # This method is here to help folks write visitors. + # + # Within the given block, every method that is defined will be checked to + # ensure it's a valid visit method using the BasicVisitor::visit_method + # method defined above. def visit_methods - @visit_methods ||= - Visitor.instance_methods.grep(/^visit_(?!child_nodes)/) + checker = VisitMethodsChecker.new + extend(checker) + yield + checker.disable! end end diff --git a/lib/syntax_tree/cli.rb b/lib/syntax_tree/cli.rb index 392dd627..e3bac8f1 100644 --- a/lib/syntax_tree/cli.rb +++ b/lib/syntax_tree/cli.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "etc" require "optparse" module SyntaxTree @@ -62,12 +63,13 @@ def writable? class ScriptItem attr_reader :source - def initialize(source) + def initialize(source, extension) @source = source + @extension = extension end def handler - HANDLERS[".rb"] + HANDLERS[@extension] end def filepath @@ -81,8 +83,12 @@ def writable? # An item of work that correspond to the content passed in via stdin. class STDINItem + def initialize(extension) + @extension = extension + end + def handler - HANDLERS[".rb"] + HANDLERS[@extension] end def filepath @@ -153,6 +159,92 @@ def failure end end + # An action of the CLI that generates ctags for the given source. + class CTags < Action + attr_reader :entries + + def initialize(options) + super + @entries = [] + end + + def run(item) + lines = item.source.lines(chomp: true) + + SyntaxTree + .index(item.source) + .each do |entry| + line = lines[entry.location.line - 1] + pattern = "/^#{line.gsub("\\", "\\\\\\\\").gsub("/", "\\/")}$/;\"" + + entries << case entry + when SyntaxTree::Index::ModuleDefinition + parts = [entry.name, item.filepath, pattern, "m"] + + if entry.nesting != [[entry.name]] + parts << "class:#{entry.nesting.flatten.tap(&:pop).join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::ClassDefinition + parts = [entry.name, item.filepath, pattern, "c"] + + if entry.nesting != [[entry.name]] + parts << "class:#{entry.nesting.flatten.tap(&:pop).join(".")}" + end + + unless entry.superclass.empty? + inherits = entry.superclass.join(".").delete_prefix(".") + parts << "inherits:#{inherits}" + end + + parts.join("\t") + when SyntaxTree::Index::MethodDefinition + parts = [entry.name, item.filepath, pattern, "f"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::SingletonMethodDefinition + parts = [entry.name, item.filepath, pattern, "F"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::AliasMethodDefinition + parts = [entry.name, item.filepath, pattern, "a"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + when SyntaxTree::Index::ConstantDefinition + parts = [entry.name, item.filepath, pattern, "C"] + + unless entry.nesting.empty? + parts << "class:#{entry.nesting.flatten.join(".")}" + end + + parts.join("\t") + end + end + end + + def success + puts(<<~HEADER) + !_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ + !_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ + HEADER + + entries.sort.each { |entry| puts(entry) } + end + end + # An action of the CLI that formats the source twice to check if the first # format is not idempotent. class Debug < Action @@ -238,7 +330,7 @@ def run(item) # representation. class Json < Action def run(item) - object = Visitor::JSONVisitor.new.visit(item.handler.parse(item.source)) + object = item.handler.parse(item.source).accept(JSONVisitor.new) puts JSON.pretty_generate(object) end end @@ -303,10 +395,11 @@ def run(item) options.print_width, options: options.formatter_options ) + changed = source != formatted - File.write(filepath, formatted) if item.writable? + File.write(filepath, formatted) if item.writable? && changed - color = source == formatted ? Color.gray(filepath) : filepath + color = changed ? filepath : Color.gray(filepath) delta = ((Time.now - start) * 1000).round puts "#{color} #{delta}ms" @@ -325,6 +418,9 @@ def run(item) #{Color.bold("stree check [--plugins=...] [--print-width=NUMBER] [-e SCRIPT] FILE")} Check that the given files are formatted as syntax tree would format them + #{Color.bold("stree ctags [-e SCRIPT] FILE")} + Print out a ctags-compatible index of the given files + #{Color.bold("stree debug [--plugins=...] [--print-width=NUMBER] [-e SCRIPT] FILE")} Check that the given files can be formatted idempotently @@ -359,14 +455,26 @@ def run(item) #{Color.bold("stree write [--plugins=...] [--print-width=NUMBER] [-e SCRIPT] FILE")} Read, format, and write back the source of the given files + --ignore-files=... + A glob pattern to ignore files when processing. This can be specified + multiple times to ignore multiple patterns. + --plugins=... A comma-separated list of plugins to load. - --print-width=NUMBER + --print-width=... The maximum line width to use when formatting. - -e SCRIPT - Parse an inline Ruby string. + -e ... + Parse an inline string. + + --extension=... + A file extension matching the content passed in via STDIN or -e. + Defaults to '.rb'. + + --config=... + Path to a configuration file. Defaults to .streerc in the current + working directory. HELP # This represents all of the options that can be passed to the CLI. It is @@ -377,6 +485,7 @@ class Options :plugins, :print_width, :scripts, + :extension, :target_ruby_version def initialize @@ -384,6 +493,7 @@ def initialize @plugins = [] @print_width = DEFAULT_PRINT_WIDTH @scripts = [] + @extension = ".rb" @target_ruby_version = DEFAULT_RUBY_VERSION end @@ -432,6 +542,13 @@ def parser # it and add it to the list of scripts to run. opts.on("-e SCRIPT") { |script| @scripts << script } + # If there is a extension specified, then parse it and use it for + # STDIN and scripts. + opts.on("--extension=EXTENSION") do |extension| + # Both ".rb" and "rb" are going to work + @extension = ".#{extension.delete_prefix(".")}" + end + # If there is a target ruby version specified on the command line, # parse that out and use it when formatting. opts.on("--target-ruby-version=VERSION") do |version| @@ -455,8 +572,16 @@ class ConfigFile attr_reader :filepath - def initialize - @filepath = File.join(Dir.pwd, FILENAME) + def initialize(filepath = nil) + if filepath + if File.readable?(filepath) + @filepath = filepath + else + raise ArgumentError, "Invalid configuration file: #{filepath}" + end + else + @filepath = File.join(Dir.pwd, FILENAME) + end end def exists? @@ -474,8 +599,24 @@ class << self def run(argv) name, *arguments = argv - config_file = ConfigFile.new - arguments.unshift(*config_file.arguments) + # First, we need to check if there's a --config option specified + # so we can use the custom config file path. + config_filepath = nil + arguments.each_with_index do |arg, index| + if arg.start_with?("--config=") + config_filepath = arg.split("=", 2)[1] + arguments.delete_at(index) + break + elsif arg == "--config" && arguments[index + 1] + config_filepath = arguments[index + 1] + arguments.delete_at(index + 1) + arguments.delete_at(index) + break + end + end + + config_file = ConfigFile.new(config_filepath) + arguments = config_file.arguments.concat(arguments) options = Options.new options.parse(arguments) @@ -486,6 +627,8 @@ def run(argv) AST.new(options) when "c", "check" Check.new(options) + when "ctags" + CTags.new(options) when "debug" Debug.new(options) when "doc" @@ -500,8 +643,10 @@ def run(argv) when "j", "json" Json.new(options) when "lsp" - require "syntax_tree/language_server" - LanguageServer.new(print_width: options.print_width).run + LanguageServer.new( + print_width: options.print_width, + ignore_files: options.ignore_files + ).run return 0 when "m", "match" Match.new(options) @@ -538,9 +683,11 @@ def run(argv) end end - options.scripts.each { |script| queue << ScriptItem.new(script) } + options.scripts.each do |script| + queue << ScriptItem.new(script, options.extension) + end else - queue << STDINItem.new + queue << STDINItem.new(options.extension) end # At the end, we're going to return whether or not this worker ever diff --git a/lib/syntax_tree/database.rb b/lib/syntax_tree/database.rb new file mode 100644 index 00000000..c9981f35 --- /dev/null +++ b/lib/syntax_tree/database.rb @@ -0,0 +1,331 @@ +# frozen_string_literal: true + +module SyntaxTree + # Provides the ability to index source files into a database, then query for + # the nodes. + module Database + class IndexingVisitor < SyntaxTree::FieldVisitor + attr_reader :database, :filepath, :node_id + + def initialize(database, filepath) + @database = database + @filepath = filepath + @node_id = nil + end + + private + + def comments(node) + end + + def field(name, value) + return unless value.is_a?(SyntaxTree::Node) + + binds = [node_id, visit(value), name] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name) + VALUES (?, ?, ?) + SQL + end + + def list(name, values) + values.each_with_index do |value, index| + binds = [node_id, visit(value), name, index] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name, list_index) + VALUES (?, ?, ?, ?) + SQL + end + end + + def node(node, _name) + previous = node_id + binds = [ + node.class.name.delete_prefix("SyntaxTree::"), + filepath, + node.location.start_line, + node.location.start_column + ] + + database.execute(<<~SQL, binds) + INSERT INTO nodes (type, path, line, column) + VALUES (?, ?, ?, ?) + SQL + + begin + @node_id = database.last_insert_row_id + yield + @node_id + ensure + @node_id = previous + end + end + + def text(name, value) + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + binds = [node_id, visit(key), "#{name}[0]", index] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name, list_index) + VALUES (?, ?, ?, ?) + SQL + + binds = [node_id, visit(value), "#{name}[1]", index] + database.execute(<<~SQL, binds) + INSERT INTO edges (from_id, to_id, name, list_index) + VALUES (?, ?, ?, ?) + SQL + end + end + end + + # Query for a specific type of node. + class TypeQuery + attr_reader :type + + def initialize(type) + @type = type + end + + def each(database, &block) + sql = "SELECT * FROM nodes WHERE type = ?" + database.execute(sql, type).each(&block) + end + end + + # Query for the attributes of a node, optionally also filtering by type. + class AttrQuery + attr_reader :type, :attrs + + def initialize(type, attrs) + @type = type + @attrs = attrs + end + + def each(database, &block) + joins = [] + binds = [] + + attrs.each do |name, query| + ids = query.each(database).map { |row| row[0] } + joins << <<~SQL + JOIN edges AS #{name} + ON #{name}.from_id = nodes.id + AND #{name}.name = ? + AND #{name}.to_id IN (#{(["?"] * ids.size).join(", ")}) + SQL + + binds.push(name).concat(ids) + end + + sql = +"SELECT nodes.* FROM nodes, edges #{joins.join(" ")}" + + if type + sql << " WHERE nodes.type = ?" + binds << type + end + + sql << " GROUP BY nodes.id" + database.execute(sql, binds).each(&block) + end + end + + # Query for the results of either query. + class OrQuery + attr_reader :left, :right + + def initialize(left, right) + @left = left + @right = right + end + + def each(database, &block) + left.each(database, &block) + right.each(database, &block) + end + end + + # A lazy query result. + class QueryResult + attr_reader :database, :query + + def initialize(database, query) + @database = database + @query = query + end + + def each(&block) + return enum_for(__method__) unless block_given? + query.each(database, &block) + end + end + + # A pattern matching expression that will be compiled into a query. + class Pattern + class CompilationError < StandardError + end + + attr_reader :query + + def initialize(query) + @query = query + end + + def compile + program = + begin + SyntaxTree.parse("case nil\nin #{query}\nend") + rescue Parser::ParseError + raise CompilationError, query + end + + compile_node(program.statements.body.first.consequent.pattern) + end + + private + + def compile_error(node) + raise CompilationError, PP.pp(node, +"").chomp + end + + # Shortcut for combining two queries into one that returns the results of + # if either query matches. + def combine_or(left, right) + OrQuery.new(left, right) + end + + # in foo | bar + def compile_binary(node) + compile_error(node) if node.operator != :| + + combine_or(compile_node(node.left), compile_node(node.right)) + end + + # in Ident + def compile_const(node) + value = node.value + + if SyntaxTree.const_defined?(value, false) + clazz = SyntaxTree.const_get(value) + TypeQuery.new(clazz.name.delete_prefix("SyntaxTree::")) + else + compile_error(node) + end + end + + # in SyntaxTree::Ident + def compile_const_path_ref(node) + parent = node.parent + if !parent.is_a?(SyntaxTree::VarRef) || + !parent.value.is_a?(SyntaxTree::Const) + compile_error(node) + end + + if parent.value.value == "SyntaxTree" + compile_node(node.constant) + else + compile_error(node) + end + end + + # in Ident[value: String] + def compile_hshptn(node) + compile_error(node) unless node.keyword_rest.nil? + + attrs = {} + node.keywords.each do |keyword, value| + compile_error(node) unless keyword.is_a?(SyntaxTree::Label) + attrs[keyword.value.chomp(":")] = compile_node(value) + end + + type = node.constant ? compile_node(node.constant).type : nil + AttrQuery.new(type, attrs) + end + + # in Foo + def compile_var_ref(node) + value = node.value + + if value.is_a?(SyntaxTree::Const) + compile_node(value) + else + compile_error(node) + end + end + + def compile_node(node) + case node + when SyntaxTree::Binary + compile_binary(node) + when SyntaxTree::Const + compile_const(node) + when SyntaxTree::ConstPathRef + compile_const_path_ref(node) + when SyntaxTree::HshPtn + compile_hshptn(node) + when SyntaxTree::VarRef + compile_var_ref(node) + else + compile_error(node) + end + end + end + + class Connection + attr_reader :raw_connection + + def initialize(raw_connection) + @raw_connection = raw_connection + end + + def execute(query, binds = []) + raw_connection.execute(query, binds) + end + + def index_file(filepath) + program = SyntaxTree.parse(SyntaxTree.read(filepath)) + program.accept(IndexingVisitor.new(self, filepath)) + end + + def last_insert_row_id + raw_connection.last_insert_row_id + end + + def prepare + raw_connection.execute(<<~SQL) + CREATE TABLE nodes ( + id integer primary key, + type varchar(20), + path varchar(200), + line integer, + column integer + ); + SQL + + raw_connection.execute(<<~SQL) + CREATE INDEX nodes_type ON nodes (type); + SQL + + raw_connection.execute(<<~SQL) + CREATE TABLE edges ( + id integer primary key, + from_id integer, + to_id integer, + name varchar(20), + list_index integer + ); + SQL + + raw_connection.execute(<<~SQL) + CREATE INDEX edges_name ON edges (name); + SQL + end + + def search(query) + QueryResult.new(self, Pattern.new(query).compile) + end + end + end +end diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 860a1fe5..4506aa04 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -210,12 +210,17 @@ def RAssign(value, operator, pattern) end # Create a new ClassDeclaration node. - def ClassDeclaration(constant, superclass, bodystmt) + def ClassDeclaration( + constant, + superclass, + bodystmt, + location = Location.default + ) ClassDeclaration.new( constant: constant, superclass: superclass, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -225,12 +230,12 @@ def Comma(value) end # Create a new Command node. - def Command(message, arguments, block) + def Command(message, arguments, block, location = Location.default) Command.new( message: message, arguments: arguments, block: block, - location: Location.default + location: location ) end @@ -247,8 +252,8 @@ def CommandCall(receiver, operator, message, arguments, block) end # Create a new Comment node. - def Comment(value, inline) - Comment.new(value: value, inline: inline, location: Location.default) + def Comment(value, inline, location = Location.default) + Comment.new(value: value, inline: inline, location: location) end # Create a new Const node. @@ -285,14 +290,21 @@ def CVar(value) end # Create a new DefNode node. - def DefNode(target, operator, name, params, bodystmt) + def DefNode( + target, + operator, + name, + params, + bodystmt, + location = Location.default + ) DefNode.new( target: target, operator: operator, name: name, params: params, bodystmt: bodystmt, - location: Location.default + location: location ) end @@ -565,8 +577,8 @@ def MAssign(target, value) end # Create a new MethodAddBlock node. - def MethodAddBlock(call, block) - MethodAddBlock.new(call: call, block: block, location: Location.default) + def MethodAddBlock(call, block, location = Location.default) + MethodAddBlock.new(call: call, block: block, location: location) end # Create a new MLHS node. @@ -779,7 +791,7 @@ def SClass(target, bodystmt) # Create a new Statements node. def Statements(body) - Statements.new(nil, body: body, location: Location.default) + Statements.new(body: body, location: Location.default) end # Create a new StringContent node. diff --git a/lib/syntax_tree/visitor/field_visitor.rb b/lib/syntax_tree/field_visitor.rb similarity index 91% rename from lib/syntax_tree/visitor/field_visitor.rb rename to lib/syntax_tree/field_visitor.rb index 6e643e09..f5607c67 100644 --- a/lib/syntax_tree/visitor/field_visitor.rb +++ b/lib/syntax_tree/field_visitor.rb @@ -1,55 +1,54 @@ # frozen_string_literal: true module SyntaxTree - class Visitor - # This is the parent class of a lot of built-in visitors for Syntax Tree. It - # reflects visiting each of the fields on every node in turn. It itself does - # not do anything with these fields, it leaves that behavior up to the - # subclass to implement. - # - # In order to properly use this class, you will need to subclass it and - # implement #comments, #field, #list, #node, #pairs, and #text. Those are - # documented here. - # - # == comments(node) - # - # This accepts the node that is being visited and does something depending - # on the comments attached to the node. - # - # == field(name, value) - # - # This accepts the name of the field being visited as a string (like - # "value") and the actual value of that field. The value can be a subclass - # of Node or any other type that can be held within the tree. - # - # == list(name, values) - # - # This accepts the name of the field being visited as well as a list of - # values. This is used, for example, when visiting something like the body - # of a Statements node. - # - # == node(name, node) - # - # This is the parent serialization method for each node. It is called with - # the node itself, as well as the type of the node as a string. The type - # is an internally used value that usually resembles the name of the - # ripper event that generated the node. The method should yield to the - # given block which then calls through to visit each of the fields on the - # node. - # - # == text(name, value) - # - # This accepts the name of the field being visited as well as a string - # value representing the value of the field. - # - # == pairs(name, values) - # - # This accepts the name of the field being visited as well as a list of - # pairs that represent the value of the field. It is used only in a couple - # of circumstances, like when visiting the list of optional parameters - # defined on a method. - # - class FieldVisitor < BasicVisitor + # This is the parent class of a lot of built-in visitors for Syntax Tree. It + # reflects visiting each of the fields on every node in turn. It itself does + # not do anything with these fields, it leaves that behavior up to the + # subclass to implement. + # + # In order to properly use this class, you will need to subclass it and + # implement #comments, #field, #list, #node, #pairs, and #text. Those are + # documented here. + # + # == comments(node) + # + # This accepts the node that is being visited and does something depending on + # the comments attached to the node. + # + # == field(name, value) + # + # This accepts the name of the field being visited as a string (like "value") + # and the actual value of that field. The value can be a subclass of Node or + # any other type that can be held within the tree. + # + # == list(name, values) + # + # This accepts the name of the field being visited as well as a list of + # values. This is used, for example, when visiting something like the body of + # a Statements node. + # + # == node(name, node) + # + # This is the parent serialization method for each node. It is called with the + # node itself, as well as the type of the node as a string. The type is an + # internally used value that usually resembles the name of the ripper event + # that generated the node. The method should yield to the given block which + # then calls through to visit each of the fields on the node. + # + # == text(name, value) + # + # This accepts the name of the field being visited as well as a string value + # representing the value of the field. + # + # == pairs(name, values) + # + # This accepts the name of the field being visited as well as a list of pairs + # that represent the value of the field. It is used only in a couple of + # circumstances, like when visiting the list of optional parameters defined on + # a method. + # + class FieldVisitor < BasicVisitor + visit_methods do def visit_aref(node) node(node, "aref") do field("collection", node.collection) @@ -264,6 +263,7 @@ def visit_command(node) node(node, "command") do field("message", node.message) field("arguments", node.arguments) + field("block", node.block) if node.block comments(node) end end @@ -274,6 +274,7 @@ def visit_command_call(node) field("operator", node.operator) field("message", node.message) field("arguments", node.arguments) if node.arguments + field("block", node.block) if node.block comments(node) end end @@ -1017,14 +1018,14 @@ def visit_zsuper(node) def visit___end__(node) visit_token(node, "__end__") end + end - private + private - def visit_token(node, type) - node(node, type) do - field("value", node.value) - comments(node) - end + def visit_token(node, type) + node(node, type) do + field("value", node.value) + comments(node) end end end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index d5d251c6..2b229885 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -21,11 +21,15 @@ def initialize(version) # that folks have become entrenched in their ways, we decided to provide a # small amount of configurability. class Options - attr_reader :quote, :trailing_comma, :target_ruby_version + attr_reader :quote, + :trailing_comma, + :disable_auto_ternary, + :target_ruby_version def initialize( quote: :default, trailing_comma: :default, + disable_auto_ternary: :default, target_ruby_version: :default ) @quote = @@ -50,6 +54,17 @@ def initialize( trailing_comma end + @disable_auto_ternary = + if disable_auto_ternary == :default + # We ship with a disable ternary plugin that will define this + # constant. That constant is responsible for determining the default + # disable ternary value. If it's defined, then we default to true. + # Otherwise we default to false. + defined?(DISABLE_AUTO_TERNARY) + else + disable_auto_ternary + end + @target_ruby_version = if target_ruby_version == :default # The default target Ruby version is the current version of Ruby. @@ -69,8 +84,13 @@ def initialize( # These options are overridden in plugins to we need to make sure they are # available here. - attr_reader :quote, :trailing_comma, :target_ruby_version + attr_reader :quote, + :trailing_comma, + :disable_auto_ternary, + :target_ruby_version + alias trailing_comma? trailing_comma + alias disable_auto_ternary? disable_auto_ternary def initialize(source, *args, options: Options.new) super(*args) @@ -81,13 +101,14 @@ def initialize(source, *args, options: Options.new) # Memoizing these values to make access faster. @quote = options.quote @trailing_comma = options.trailing_comma + @disable_auto_ternary = options.disable_auto_ternary @target_ruby_version = options.target_ruby_version end - def self.format(source, node) + def self.format(source, node, base_indentation = 0) q = new(source, []) q.format(node) - q.flush + q.flush(base_indentation) q.output.join end @@ -117,7 +138,7 @@ def format(node, stackable: true) # going to just print out the node as it was seen in the source. doc = if last_leading&.ignore? - range = source[node.location.start_char...node.location.end_char] + range = source[node.start_char...node.end_char] first = true range.each_line(chomp: true) do |line| diff --git a/lib/syntax_tree/index.rb b/lib/syntax_tree/index.rb new file mode 100644 index 00000000..0280749f --- /dev/null +++ b/lib/syntax_tree/index.rb @@ -0,0 +1,683 @@ +# frozen_string_literal: true + +module SyntaxTree + # This class can be used to build an index of the structure of Ruby files. We + # define an index as the list of constants and methods defined within a file. + # + # This index strives to be as fast as possible to better support tools like + # IDEs. Because of that, it has different backends depending on what + # functionality is available. + module Index + # This is a location for an index entry. + class Location + attr_reader :line, :column + + def initialize(line, column) + @line = line + @column = column + end + end + + # This entry represents a class definition using the class keyword. + class ClassDefinition + attr_reader :nesting, :name, :superclass, :location, :comments + + def initialize(nesting, name, superclass, location, comments) + @nesting = nesting + @name = name + @superclass = superclass + @location = location + @comments = comments + end + end + + # This entry represents a constant assignment. + class ConstantDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a module definition using the module keyword. + class ModuleDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a method definition using the def keyword. + class MethodDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a singleton method definition using the def keyword + # with a specified target. + class SingletonMethodDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # This entry represents a method definition that was created using the alias + # keyword. + class AliasMethodDefinition + attr_reader :nesting, :name, :location, :comments + + def initialize(nesting, name, location, comments) + @nesting = nesting + @name = name + @location = location + @comments = comments + end + end + + # When you're using the instruction sequence backend, this class is used to + # lazily parse comments out of the source code. + class FileComments + # We use the ripper library to pull out source comments. + class Parser < Ripper + attr_reader :comments + + def initialize(*) + super + @comments = {} + end + + def on_comment(value) + comments[lineno] = value.chomp + end + end + + # This represents the Ruby source in the form of a file. When it needs to + # be read we'll read the file. + class FileSource + attr_reader :filepath + + def initialize(filepath) + @filepath = filepath + end + + def source + File.read(filepath) + end + end + + # This represents the Ruby source in the form of a string. When it needs + # to be read the string is returned. + class StringSource + attr_reader :source + + def initialize(source) + @source = source + end + end + + attr_reader :source + + def initialize(source) + @source = source + end + + def comments + @comments ||= Parser.new(source.source).tap(&:parse).comments + end + end + + # This class handles parsing comments from Ruby source code in the case that + # we use the instruction sequence backend. Because the instruction sequence + # backend doesn't provide comments (since they are dropped) we provide this + # interface to lazily parse them out. + class EntryComments + include Enumerable + attr_reader :file_comments, :location + + def initialize(file_comments, location) + @file_comments = file_comments + @location = location + end + + def each(&block) + line = location.line - 1 + result = [] + + while line >= 0 && (comment = file_comments.comments[line]) + result.unshift(comment) + line -= 1 + end + + result.each(&block) + end + end + + # This backend creates the index using RubyVM::InstructionSequence, which is + # faster than using the Syntax Tree parser, but is not available on all + # runtimes. + class ISeqBackend + VM_DEFINECLASS_TYPE_CLASS = 0x00 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 0x01 + VM_DEFINECLASS_TYPE_MODULE = 0x02 + VM_DEFINECLASS_FLAG_SCOPED = 0x08 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 0x10 + + def index(source) + index_iseq( + RubyVM::InstructionSequence.compile(source).to_a, + FileComments.new(FileComments::StringSource.new(source)) + ) + end + + def index_file(filepath) + index_iseq( + RubyVM::InstructionSequence.compile_file(filepath).to_a, + FileComments.new(FileComments::FileSource.new(filepath)) + ) + end + + private + + def location_for(iseq) + code_location = iseq[4][:code_location] + Location.new(code_location[0], code_location[1]) + end + + def find_constant_path(insns, index) + index -= 1 while index >= 0 && + ( + insns[index].is_a?(Integer) || + ( + insns[index].is_a?(Array) && + %i[swap topn].include?(insns[index][0]) + ) + ) + insn = insns[index] + + if insn.is_a?(Array) && insn[0] == :opt_getconstant_path + # In this case we're on Ruby 3.2+ and we have an opt_getconstant_path + # instruction, so we already know all of the symbols in the nesting. + [index - 1, insn[1]] + elsif insn.is_a?(Symbol) && insn.match?(/\Alabel_\d+/) + # Otherwise, if we have a label then this is very likely the + # destination of an opt_getinlinecache instruction, in which case + # we'll walk backwards to grab up all of the constants. + names = [] + + index -= 1 + until insns[index].is_a?(Array) && + insns[index][0] == :opt_getinlinecache + if insns[index].is_a?(Array) && insns[index][0] == :getconstant + names.unshift(insns[index][1]) + end + + index -= 1 + end + + [index - 1, names] + else + [index, []] + end + end + + def find_attr_arguments(insns, index) + orig_argc = insns[index][1][:orig_argc] + names = [] + + current = index - 1 + while current >= 0 && names.length < orig_argc + if insns[current].is_a?(Array) && insns[current][0] == :putobject + names.unshift(insns[current][1]) + end + + current -= 1 + end + + names if insns[current] == [:putself] && names.length == orig_argc + end + + def method_definition(nesting, name, location, file_comments) + comments = EntryComments.new(file_comments, location) + + if nesting.last == [:singletonclass] + SingletonMethodDefinition.new( + nesting[0...-1], + name, + location, + comments + ) + else + MethodDefinition.new(nesting, name, location, comments) + end + end + + def index_iseq(iseq, file_comments) + results = [] + queue = [[iseq, []]] + + while (current_iseq, current_nesting = queue.shift) + file = current_iseq[5] + line = current_iseq[8] + insns = current_iseq[13] + + insns.each_with_index do |insn, index| + case insn + when Integer + line = insn + next + when Array + # continue on + else + # skip everything else + next + end + + case insn[0] + when :defineclass + _, name, class_iseq, flags = insn + next_nesting = current_nesting.dup + + # This is the index we're going to search for the nested constant + # path within the declaration name. + constant_index = index - 2 + + # This is the superclass of the class being defined. + superclass = [] + + # If there is a superclass, then we're going to find it here and + # then update the constant_index as necessary. + if flags & VM_DEFINECLASS_FLAG_HAS_SUPERCLASS > 0 + constant_index, superclass = + find_constant_path(insns, index - 1) + + if superclass.empty? + warn("#{file}:#{line}: superclass with non constant path") + next + end + end + + if (_, nesting = find_constant_path(insns, constant_index)) + # If there is a constant path in the class name, then we need to + # handle that by updating the nesting. + next_nesting << (nesting << name) + else + # Otherwise we'll add the class name to the nesting. + next_nesting << [name] + end + + if flags == VM_DEFINECLASS_TYPE_SINGLETON_CLASS + # At the moment, we don't support singletons that aren't + # defined on self. We could, but it would require more + # emulation. + if insns[index - 2] != [:putself] + warn( + "#{file}:#{line}: singleton class with non-self receiver" + ) + next + end + elsif flags & VM_DEFINECLASS_TYPE_MODULE > 0 + location = location_for(class_iseq) + results << ModuleDefinition.new( + next_nesting, + name, + location, + EntryComments.new(file_comments, location) + ) + else + location = location_for(class_iseq) + results << ClassDefinition.new( + next_nesting, + name, + superclass, + location, + EntryComments.new(file_comments, location) + ) + end + + queue << [class_iseq, next_nesting] + when :definemethod + location = location_for(insn[2]) + results << method_definition( + current_nesting, + insn[1], + location, + file_comments + ) + when :definesmethod + if insns[index - 1] != [:putself] + warn("#{file}:#{line}: singleton method with non-self receiver") + next + end + + location = location_for(insn[2]) + results << SingletonMethodDefinition.new( + current_nesting, + insn[1], + location, + EntryComments.new(file_comments, location) + ) + when :setconstant + next_nesting = current_nesting.dup + name = insn[1] + + _, nesting = find_constant_path(insns, index - 1) + next_nesting << nesting if nesting.any? + + location = Location.new(line, :unknown) + results << ConstantDefinition.new( + next_nesting, + name, + location, + EntryComments.new(file_comments, location) + ) + when :opt_send_without_block, :send + case insn[1][:mid] + when :attr_reader, :attr_writer, :attr_accessor + attr_names = find_attr_arguments(insns, index) + next unless attr_names + + location = Location.new(line, :unknown) + attr_names.each do |attr_name| + if insn[1][:mid] != :attr_writer + results << method_definition( + current_nesting, + attr_name, + location, + file_comments + ) + end + + if insn[1][:mid] != :attr_reader + results << method_definition( + current_nesting, + :"#{attr_name}=", + location, + file_comments + ) + end + end + when :"core#set_method_alias" + # Now we have to validate that the alias is happening with a + # non-interpolated value. To do this we'll match the specific + # pattern we're expecting. + values = + insns[(index - 4)...index].map do |previous| + previous.is_a?(Array) ? previous[0] : previous + end + if values != + %i[putspecialobject putspecialobject putobject putobject] + next + end + + # Now that we know it's in the structure we want it, we can use + # the values of the putobject to determine the alias. + location = Location.new(line, :unknown) + results << AliasMethodDefinition.new( + current_nesting, + insns[index - 2][1], + location, + EntryComments.new(file_comments, location) + ) + end + end + end + end + + results + end + end + + # This backend creates the index using the Syntax Tree parser and a visitor. + # It is not as fast as using the instruction sequences directly, but is + # supported on all runtimes. + class ParserBackend + class ConstantNameVisitor < Visitor + def visit_const_ref(node) + [node.constant.value.to_sym] + end + + def visit_const_path_ref(node) + visit(node.parent) << node.constant.value.to_sym + end + + def visit_var_ref(node) + [node.value.value.to_sym] + end + end + + class IndexVisitor < Visitor + attr_reader :results, :nesting, :statements + + def initialize + @results = [] + @nesting = [] + @statements = nil + end + + visit_methods do + def visit_alias(node) + if node.left.is_a?(SymbolLiteral) && node.right.is_a?(SymbolLiteral) + location = + Location.new( + node.location.start_line, + node.location.start_column + ) + + results << AliasMethodDefinition.new( + nesting.dup, + node.left.value.value.to_sym, + location, + comments_for(node) + ) + end + + super + end + + def visit_assign(node) + if node.target.is_a?(VarField) && node.target.value.is_a?(Const) + location = + Location.new( + node.location.start_line, + node.location.start_column + ) + + results << ConstantDefinition.new( + nesting.dup, + node.target.value.value.to_sym, + location, + comments_for(node) + ) + end + + super + end + + def visit_class(node) + names = node.constant.accept(ConstantNameVisitor.new) + nesting << names + + location = + Location.new(node.location.start_line, node.location.start_column) + + superclass = + if node.superclass + visited = node.superclass.accept(ConstantNameVisitor.new) + + if visited == [[]] + raise NotImplementedError, "superclass with non constant path" + end + + visited + else + [] + end + + results << ClassDefinition.new( + nesting.dup, + names.last, + superclass, + location, + comments_for(node) + ) + + super + nesting.pop + end + + def visit_command(node) + case node.message.value + when "attr_reader", "attr_writer", "attr_accessor" + comments = comments_for(node) + location = + Location.new( + node.location.start_line, + node.location.start_column + ) + + node.arguments.parts.each do |argument| + next unless argument.is_a?(SymbolLiteral) + name = argument.value.value.to_sym + + if node.message.value != "attr_writer" + results << MethodDefinition.new( + nesting.dup, + name, + location, + comments + ) + end + + if node.message.value != "attr_reader" + results << MethodDefinition.new( + nesting.dup, + :"#{name}=", + location, + comments + ) + end + end + end + + super + end + + def visit_def(node) + name = node.name.value.to_sym + location = + Location.new(node.location.start_line, node.location.start_column) + + results << if node.target.nil? + MethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + else + SingletonMethodDefinition.new( + nesting.dup, + name, + location, + comments_for(node) + ) + end + + super + end + + def visit_module(node) + names = node.constant.accept(ConstantNameVisitor.new) + nesting << names + + location = + Location.new(node.location.start_line, node.location.start_column) + + results << ModuleDefinition.new( + nesting.dup, + names.last, + location, + comments_for(node) + ) + + super + nesting.pop + end + + def visit_program(node) + super + results + end + + def visit_statements(node) + @statements = node + super + end + end + + private + + def comments_for(node) + comments = [] + + body = statements.body + line = node.location.start_line - 1 + index = body.index(node) + return comments if index.nil? + + index -= 1 + while index >= 0 && body[index].is_a?(Comment) && + (line - body[index].location.start_line < 2) + comments.unshift(body[index].value) + line = body[index].location.start_line + index -= 1 + end + + comments + end + end + + def index(source) + SyntaxTree.parse(source).accept(IndexVisitor.new) + end + + def index_file(filepath) + index(SyntaxTree.read(filepath)) + end + end + + # The class defined here is used to perform the indexing, depending on what + # functionality is available from the runtime. + INDEX_BACKEND = + defined?(RubyVM::InstructionSequence) ? ISeqBackend : ParserBackend + + # This method accepts source code and then indexes it. + def self.index(source, backend: INDEX_BACKEND.new) + backend.index(source) + end + + # This method accepts a filepath and then indexes it. + def self.index_file(filepath, backend: INDEX_BACKEND.new) + backend.index_file(filepath) + end + end +end diff --git a/lib/syntax_tree/json_visitor.rb b/lib/syntax_tree/json_visitor.rb new file mode 100644 index 00000000..7ad3fba0 --- /dev/null +++ b/lib/syntax_tree/json_visitor.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require "json" + +module SyntaxTree + # This visitor transforms the AST into a hash that contains only primitives + # that can be easily serialized into JSON. + class JSONVisitor < FieldVisitor + attr_reader :target + + def initialize + @target = nil + end + + private + + def comments(node) + target[:comments] = visit_all(node.comments) + end + + def field(name, value) + target[name] = value.is_a?(Node) ? visit(value) : value + end + + def list(name, values) + target[name] = visit_all(values) + end + + def node(node, type) + previous = @target + @target = { type: type, location: visit_location(node.location) } + yield + @target + ensure + @target = previous + end + + def pairs(name, values) + target[name] = values.map { |(key, value)| [visit(key), visit(value)] } + end + + def text(name, value) + target[name] = value + end + + def visit_location(location) + [ + location.start_line, + location.start_char, + location.end_line, + location.end_char + ] + end + end +end diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index c2265c32..aaa64e9a 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -2,10 +2,9 @@ require "cgi" require "json" +require "pp" require "uri" -require_relative "language_server/inlay_hints" - module SyntaxTree # Syntax Tree additionally ships with a language server conforming to the # language server protocol. It can be invoked through the CLI by running: @@ -13,6 +12,162 @@ module SyntaxTree # stree lsp # class LanguageServer + # This class provides inlay hints for the language server. For more + # information, see the spec here: + # https://github.com/microsoft/language-server-protocol/issues/956. + class InlayHints < Visitor + # This represents a hint that is going to be displayed in the editor. + class Hint + attr_reader :line, :character, :label + + def initialize(line:, character:, label:) + @line = line + @character = character + @label = label + end + + # This is the shape that the LSP expects. + def to_json(*opts) + { + position: { + line: line, + character: character + }, + label: label + }.to_json(*opts) + end + end + + attr_reader :stack, :hints + + def initialize + @stack = [] + @hints = [] + end + + def visit(node) + stack << node + result = super + stack.pop + result + end + + visit_methods do + # Adds parentheses around assignments contained within the default + # values of parameters. For example, + # + # def foo(a = b = c) + # end + # + # becomes + # + # def foo(a = ₍b = c₎) + # end + # + def visit_assign(node) + parentheses(node.location) if stack[-2].is_a?(Params) + super + end + + # Adds parentheses around binary expressions to make it clear which + # subexpression will be evaluated first. For example, + # + # a + b * c + # + # becomes + # + # a + ₍b * c₎ + # + def visit_binary(node) + case stack[-2] + when Assign, OpAssign + parentheses(node.location) + when Binary + parentheses(node.location) if stack[-2].operator != node.operator + end + + super + end + + # Adds parentheses around ternary operators contained within certain + # expressions where it could be confusing which subexpression will get + # evaluated first. For example, + # + # a ? b : c ? d : e + # + # becomes + # + # a ? b : ₍c ? d : e₎ + # + def visit_if_op(node) + case stack[-2] + when Assign, Binary, IfOp, OpAssign + parentheses(node.location) + end + + super + end + + # Adds the implicitly rescued StandardError into a bare rescue clause. + # For example, + # + # begin + # rescue + # end + # + # becomes + # + # begin + # rescue StandardError + # end + # + def visit_rescue(node) + if node.exception.nil? + hints << Hint.new( + line: node.location.start_line - 1, + character: node.location.start_column + "rescue".length, + label: " StandardError" + ) + end + + super + end + + # Adds parentheses around unary statements using the - operator that are + # contained within Binary nodes. For example, + # + # -a + b + # + # becomes + # + # ₍-a₎ + b + # + def visit_unary(node) + if stack[-2].is_a?(Binary) && (node.operator == "-") + parentheses(node.location) + end + + super + end + end + + private + + def parentheses(location) + hints << Hint.new( + line: location.start_line - 1, + character: location.start_column, + label: "₍" + ) + + hints << Hint.new( + line: location.end_line - 1, + character: location.end_column, + label: "₎" + ) + end + end + # This is a small module that effectively mirrors pattern matching. We're # using it so that we can support truffleruby without having to ignore the # language server. @@ -62,11 +217,13 @@ def self.[](value) def initialize( input: $stdin, output: $stdout, - print_width: DEFAULT_PRINT_WIDTH + print_width: DEFAULT_PRINT_WIDTH, + ignore_files: [] ) @input = input.binmode @output = output.binmode @print_width = print_width + @ignore_files = ignore_files end # rubocop:disable Layout/LineLength @@ -100,8 +257,12 @@ def run store.delete(request.dig(:params, :textDocument, :uri)) when Request[method: "textDocument/formatting", id: :any, params: { textDocument: { uri: :any } }] uri = request.dig(:params, :textDocument, :uri) + filepath = uri.split("///").last + ignore = @ignore_files.any? do |glob| + File.fnmatch(glob, filepath) + end contents = store[uri] - write(id: request[:id], result: contents ? format(contents, uri.split(".").last) : nil) + write(id: request[:id], result: contents && !ignore ? format(contents, uri.split(".").last) : nil) when Request[method: "textDocument/inlayHint", id: :any, params: { textDocument: { uri: :any } }] uri = request.dig(:params, :textDocument, :uri) contents = store[uri] @@ -111,6 +272,8 @@ def run write(id: request[:id], result: PP.pp(SyntaxTree.parse(store[uri]), +"")) when Request[method: %r{\$/.+}] # ignored + when Request[method: "textDocument/documentColor", params: { textDocument: { uri: :any } }] + # ignored else raise ArgumentError, "Unhandled: #{request}" end diff --git a/lib/syntax_tree/language_server/inlay_hints.rb b/lib/syntax_tree/language_server/inlay_hints.rb deleted file mode 100644 index dfd63b8d..00000000 --- a/lib/syntax_tree/language_server/inlay_hints.rb +++ /dev/null @@ -1,159 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class LanguageServer - # This class provides inlay hints for the language server. For more - # information, see the spec here: - # https://github.com/microsoft/language-server-protocol/issues/956. - class InlayHints < Visitor - # This represents a hint that is going to be displayed in the editor. - class Hint - attr_reader :line, :character, :label - - def initialize(line:, character:, label:) - @line = line - @character = character - @label = label - end - - # This is the shape that the LSP expects. - def to_json(*opts) - { - position: { - line: line, - character: character - }, - label: label - }.to_json(*opts) - end - end - - attr_reader :stack, :hints - - def initialize - @stack = [] - @hints = [] - end - - def visit(node) - stack << node - result = super - stack.pop - result - end - - # Adds parentheses around assignments contained within the default values - # of parameters. For example, - # - # def foo(a = b = c) - # end - # - # becomes - # - # def foo(a = ₍b = c₎) - # end - # - def visit_assign(node) - parentheses(node.location) if stack[-2].is_a?(Params) - super - end - - # Adds parentheses around binary expressions to make it clear which - # subexpression will be evaluated first. For example, - # - # a + b * c - # - # becomes - # - # a + ₍b * c₎ - # - def visit_binary(node) - case stack[-2] - when Assign, OpAssign - parentheses(node.location) - when Binary - parentheses(node.location) if stack[-2].operator != node.operator - end - - super - end - - # Adds parentheses around ternary operators contained within certain - # expressions where it could be confusing which subexpression will get - # evaluated first. For example, - # - # a ? b : c ? d : e - # - # becomes - # - # a ? b : ₍c ? d : e₎ - # - def visit_if_op(node) - case stack[-2] - when Assign, Binary, IfOp, OpAssign - parentheses(node.location) - end - - super - end - - # Adds the implicitly rescued StandardError into a bare rescue clause. For - # example, - # - # begin - # rescue - # end - # - # becomes - # - # begin - # rescue StandardError - # end - # - def visit_rescue(node) - if node.exception.nil? - hints << Hint.new( - line: node.location.start_line - 1, - character: node.location.start_column + "rescue".length, - label: " StandardError" - ) - end - - super - end - - # Adds parentheses around unary statements using the - operator that are - # contained within Binary nodes. For example, - # - # -a + b - # - # becomes - # - # ₍-a₎ + b - # - def visit_unary(node) - if stack[-2].is_a?(Binary) && (node.operator == "-") - parentheses(node.location) - end - - super - end - - private - - def parentheses(location) - hints << Hint.new( - line: location.start_line - 1, - character: location.start_column, - label: "₍" - ) - - hints << Hint.new( - line: location.end_line - 1, - character: location.end_column, - label: "₎" - ) - end - end - end -end diff --git a/lib/syntax_tree/match_visitor.rb b/lib/syntax_tree/match_visitor.rb new file mode 100644 index 00000000..ca5bf234 --- /dev/null +++ b/lib/syntax_tree/match_visitor.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a Ruby pattern matching expression that + # would match correctly against the AST. + class MatchVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + def visit(node) + case node + when Node + super + when String + # pp will split up a string on newlines and concat them together using a + # "+" operator. This breaks the pattern matching expression. So instead + # we're going to check here for strings and manually put the entire + # value into the output buffer. + q.text(node.inspect) + else + node.pretty_print(q) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.nest(0) do + q.text("comments: [") + q.indent do + q.breakable("") + q.seplist(node.comments) { |comment| visit(comment) } + end + q.breakable("") + q.text("]") + end + end + + def field(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + visit(value) + end + end + + def list(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) { |value| visit(value) } + end + q.breakable("") + q.text("]") + end + end + + def node(node, _type) + items = [] + q.with_target(items) { yield } + + if items.empty? + q.text(node.class.name) + return + end + + q.group do + q.text(node.class.name) + q.text("[") + q.indent do + q.breakable("") + q.seplist(items) { |item| q.target << item } + end + q.breakable("") + q.text("]") + end + end + + def pairs(name, values) + q.group do + q.text(name) + q.text(": [") + q.indent do + q.breakable("") + q.seplist(values) do |(key, value)| + q.group do + q.text("[") + q.indent do + q.breakable("") + visit(key) + q.text(",") + q.breakable + visit(value || nil) + end + q.breakable("") + q.text("]") + end + end + end + q.breakable("") + q.text("]") + end + end + + def text(name, value) + q.nest(0) do + q.text(name) + q.text(": ") + value.pretty_print(q) + end + end + end +end diff --git a/lib/syntax_tree/mermaid.rb b/lib/syntax_tree/mermaid.rb new file mode 100644 index 00000000..68ea4734 --- /dev/null +++ b/lib/syntax_tree/mermaid.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +require "cgi" +require "stringio" + +module SyntaxTree + # This module is responsible for rendering mermaid (https://mermaid.js.org/) + # flow charts. + module Mermaid + # This is the main class that handles rendering a flowchart. It keeps track + # of its nodes and links and renders them according to the mermaid syntax. + class FlowChart + attr_reader :output, :prefix, :nodes, :links + + def initialize + @output = StringIO.new + @output.puts("flowchart TD") + @prefix = " " + + @nodes = {} + @links = [] + end + + # Retrieve a node that has already been added to the flowchart by its id. + def fetch(id) + nodes.fetch(id) + end + + # Add a link to the flowchart between two nodes with an optional label. + def link(from, to, label = nil, type: :directed, color: nil) + link = Link.new(from, to, label, type, color) + links << link + + output.puts("#{prefix}#{link.render}") + link + end + + # Add a node to the flowchart with an optional label. + def node(id, label = " ", shape: :rectangle) + node = Node.new(id, label, shape) + nodes[id] = node + + output.puts("#{prefix}#{nodes[id].render}") + node + end + + # Add a subgraph to the flowchart. Within the given block, all of the + # nodes will be rendered within the subgraph. + def subgraph(label) + output.puts("#{prefix}subgraph #{Mermaid.escape(label)}") + + previous = prefix + @prefix = "#{prefix} " + + begin + yield + ensure + @prefix = previous + output.puts("#{prefix}end") + end + end + + # Return the rendered flowchart. + def render + links.each_with_index do |link, index| + if link.color + output.puts("#{prefix}linkStyle #{index} stroke:#{link.color}") + end + end + + output.string + end + end + + # This class represents a link between two nodes in a flowchart. It is not + # meant to be interacted with directly, but rather used as a data structure + # by the FlowChart class. + class Link + TYPES = %i[directed dotted].freeze + COLORS = %i[green red].freeze + + attr_reader :from, :to, :label, :type, :color + + def initialize(from, to, label, type, color) + raise unless TYPES.include?(type) + raise if color && !COLORS.include?(color) + + @from = from + @to = to + @label = label + @type = type + @color = color + end + + def render + left_side, right_side, full_side = sides + + if label + escaped = Mermaid.escape(label) + "#{from.id} #{left_side} #{escaped} #{right_side} #{to.id}" + else + "#{from.id} #{full_side} #{to.id}" + end + end + + private + + def sides + case type + when :directed + %w[-- --> -->] + when :dotted + %w[-. .-> -.->] + end + end + end + + # This class represents a node in a flowchart. Unlike the Link class, it can + # be used directly. It is the return value of the #node method, and is meant + # to be passed around to #link methods to create links between nodes. + class Node + SHAPES = %i[circle rectangle rounded stadium].freeze + + attr_reader :id, :label, :shape + + def initialize(id, label, shape) + raise unless SHAPES.include?(shape) + + @id = id + @label = label + @shape = shape + end + + def render + left_bound, right_bound = bounds + "#{id}#{left_bound}#{Mermaid.escape(label)}#{right_bound}" + end + + private + + def bounds + case shape + when :circle + %w[(( ))] + when :rectangle + ["[", "]"] + when :rounded + %w[( )] + when :stadium + ["([", "])"] + end + end + end + + class << self + # Escape a label to be used in the mermaid syntax. This is used to escape + # HTML entities such that they render properly within the quotes. + def escape(label) + "\"#{CGI.escapeHTML(label)}\"" + end + + # Create a new flowchart. If a block is given, it will be yielded to and + # the flowchart will be rendered. Otherwise, the flowchart will be + # returned. + def flowchart + flowchart = FlowChart.new + + if block_given? + yield flowchart + flowchart.render + else + flowchart + end + end + end + end +end diff --git a/lib/syntax_tree/mermaid_visitor.rb b/lib/syntax_tree/mermaid_visitor.rb new file mode 100644 index 00000000..fc9f6706 --- /dev/null +++ b/lib/syntax_tree/mermaid_visitor.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor transforms the AST into a mermaid flow chart. + class MermaidVisitor < FieldVisitor + attr_reader :flowchart, :target + + def initialize + @flowchart = Mermaid.flowchart + @target = nil + end + + def visit_program(node) + super + flowchart.render + end + + private + + def comments(node) + # Ignore + end + + def field(name, value) + case value + when nil + # skip + when Node + flowchart.link(target, visit(value), name) + else + to = + flowchart.node("#{target.id}_#{name}", value.inspect, shape: :stadium) + flowchart.link(target, to, name) + end + end + + def list(name, values) + values.each_with_index do |value, index| + field("#{name}[#{index}]", value) + end + end + + def node(node, type) + previous_target = target + + begin + @target = flowchart.node("node_#{node.object_id}", type) + yield + @target + ensure + @target = previous_target + end + end + + def pairs(name, values) + values.each_with_index do |(key, value), index| + to = flowchart.node("#{target.id}_#{name}_#{index}", shape: :circle) + + flowchart.link(target, to, "#{name}[#{index}]") + flowchart.link(to, visit(key), "[0]") + flowchart.link(to, visit(value), "[1]") if value + end + end + + def text(name, value) + field(name, value) + end + end +end diff --git a/lib/syntax_tree/visitor/mutation_visitor.rb b/lib/syntax_tree/mutation_visitor.rb similarity index 94% rename from lib/syntax_tree/visitor/mutation_visitor.rb rename to lib/syntax_tree/mutation_visitor.rb index 65f8c5ba..0b4b9357 100644 --- a/lib/syntax_tree/visitor/mutation_visitor.rb +++ b/lib/syntax_tree/mutation_visitor.rb @@ -1,39 +1,39 @@ # frozen_string_literal: true module SyntaxTree - class Visitor - # This visitor walks through the tree and copies each node as it is being - # visited. This is useful for mutating the tree before it is formatted. - class MutationVisitor < BasicVisitor - attr_reader :mutations + # This visitor walks through the tree and copies each node as it is being + # visited. This is useful for mutating the tree before it is formatted. + class MutationVisitor < BasicVisitor + attr_reader :mutations - def initialize - @mutations = [] - end - - # Create a new mutation based on the given query that will mutate the node - # using the given block. The block should return a new node that will take - # the place of the given node in the tree. These blocks frequently make - # use of the `copy` method on nodes to create a new node with the same - # properties as the original node. - def mutate(query, &block) - mutations << [Pattern.new(query).compile, block] - end + def initialize + @mutations = [] + end - # This is the base visit method for each node in the tree. It first - # creates a copy of the node using the visit_* methods defined below. Then - # it checks each mutation in sequence and calls it if it finds a match. - def visit(node) - return unless node - result = node.accept(self) + # Create a new mutation based on the given query that will mutate the node + # using the given block. The block should return a new node that will take + # the place of the given node in the tree. These blocks frequently make use + # of the `copy` method on nodes to create a new node with the same + # properties as the original node. + def mutate(query, &block) + mutations << [Pattern.new(query).compile, block] + end - mutations.each do |(pattern, mutation)| - result = mutation.call(result) if pattern.call(result) - end + # This is the base visit method for each node in the tree. It first creates + # a copy of the node using the visit_* methods defined below. Then it checks + # each mutation in sequence and calls it if it finds a match. + def visit(node) + return unless node + result = node.accept(self) - result + mutations.each do |(pattern, mutation)| + result = mutation.call(result) if pattern.call(result) end + result + end + + visit_methods do # Visit a BEGINBlock node. def visit_BEGIN(node) node.copy( diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 53fb3905..96241bb1 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -126,18 +126,28 @@ def format(q) raise NotImplementedError end + def start_char + location.start_char + end + + def end_char + location.end_char + end + def pretty_print(q) - visitor = Visitor::PrettyPrintVisitor.new(q) - visitor.visit(self) + accept(PrettyPrintVisitor.new(q)) end def to_json(*opts) - visitor = Visitor::JSONVisitor.new - visitor.visit(self).to_json(*opts) + accept(JSONVisitor.new).to_json(*opts) + end + + def to_mermaid + accept(MermaidVisitor.new) end def construct_keys - PrettierPrint.format(+"") { |q| Visitor::MatchVisitor.new(q).visit(self) } + PrettierPrint.format(+"") { |q| accept(MatchVisitor.new(q)) } end end @@ -278,7 +288,7 @@ def format(q) q.text(value) else q.text(q.quote) - q.text(value[1] == "\"" ? "\\\"" : value[1]) + q.text(value[1] == q.quote ? "\\#{q.quote}" : value[1]) q.text(q.quote) end end @@ -555,7 +565,7 @@ def var_alias? # collection[] # class ARef < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -633,7 +643,7 @@ def ===(other) # collection[index] = value # class ARefField < Node - # [untyped] the value being indexed + # [Node] the value being indexed attr_reader :collection # [nil | Args] the value being passed within the brackets @@ -775,12 +785,17 @@ def ===(other) other.is_a?(ArgParen) && arguments === other.arguments end + def arity + arguments&.arity || 0 + end + private def trailing_comma? + arguments = self.arguments return false unless arguments.is_a?(Args) - parts = arguments.parts + parts = arguments.parts if parts.last.is_a?(ArgBlock) # If the last argument is a block, then we can't put a trailing comma # after it without resulting in a syntax error. @@ -804,7 +819,7 @@ def trailing_comma? # method(first, second, third) # class Args < Node - # [Array[ untyped ]] the arguments that this node wraps + # [Array[ Node ]] the arguments that this node wraps attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -848,6 +863,21 @@ def format(q) def ===(other) other.is_a?(Args) && ArrayMatch.call(parts, other.parts) end + + def arity + parts.sum do |part| + case part + when ArgStar, ArgsForward + Float::INFINITY + when BareAssocHash + part.assocs.sum do |assoc| + assoc.is_a?(AssocSplat) ? Float::INFINITY : 1 + end + else + 1 + end + end + end end # ArgBlock represents using a block operator on an expression. @@ -855,7 +885,7 @@ def ===(other) # method(&expression) # class ArgBlock < Node - # [nil | untyped] the expression being turned into a block + # [nil | Node] the expression being turned into a block attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -907,7 +937,7 @@ def ===(other) # method(*arguments) # class ArgStar < Node - # [nil | untyped] the expression being splatted + # [nil | Node] the expression being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1008,6 +1038,10 @@ def format(q) def ===(other) other.is_a?(ArgsForward) end + + def arity + Float::INFINITY + end end # ArrayLiteral represents an array literal, which can optionally contain @@ -1080,58 +1114,6 @@ def format(q) end end - # Formats an array that contains only a list of variable references. To make - # things simpler, if there are a bunch, we format them all using the "fill" - # algorithm as opposed to breaking them into a ton of lines. For example, - # - # [foo, bar, baz] - # - # instead of becoming: - # - # [ - # foo, - # bar, - # baz - # ] - # - # would instead become: - # - # [ - # foo, bar, - # baz - # ] - # - # provided the line length was hit between `bar` and `baz`. - class VarRefsFormatter - # The separator for the fill algorithm. - class Separator - def call(q) - q.text(",") - q.fill_breakable - end - end - - # [Args] the contents of the array - attr_reader :contents - - def initialize(contents) - @contents = contents - end - - def format(q) - q.text("[") - q.group do - q.indent do - q.breakable_empty - q.seplist(contents.parts, Separator.new) { |part| q.format(part) } - q.if_break { q.text(",") } if q.trailing_comma? - end - q.breakable_empty - end - q.text("]") - end - end - # This is a special formatter used if the array literal contains no values # but _does_ contain comments. In this case we do some special formatting to # make sure the comments gets indented properly. @@ -1158,7 +1140,8 @@ def format(q) end end - # [LBracket] the bracket that opens this array + # [nil | LBracket | QSymbolsBeg | QWordsBeg | SymbolsBeg | WordsBeg] the + # bracket that opens this array attr_reader :lbracket # [nil | Args] the contents of the array @@ -1206,19 +1189,20 @@ def deconstruct_keys(_keys) end def format(q) - if qwords? - QWordsFormatter.new(contents).format(q) - return - end + lbracket = self.lbracket + contents = self.contents - if qsymbols? - QSymbolsFormatter.new(contents).format(q) - return - end + if lbracket.is_a?(LBracket) && lbracket.comments.empty? && contents && + contents.comments.empty? && contents.parts.length > 1 + if qwords? + QWordsFormatter.new(contents).format(q) + return + end - if var_refs?(q) - VarRefsFormatter.new(contents).format(q) - return + if qsymbols? + QSymbolsFormatter.new(contents).format(q) + return + end end if empty_with_comments? @@ -1250,39 +1234,24 @@ def ===(other) private def qwords? - lbracket.comments.empty? && contents && contents.comments.empty? && - contents.parts.length > 1 && - contents.parts.all? do |part| - case part - when StringLiteral - part.comments.empty? && part.parts.length == 1 && - part.parts.first.is_a?(TStringContent) && - !part.parts.first.value.match?(/[\s\[\]\\]/) - when CHAR - !part.value.match?(/[\[\]\\]/) - else - false - end + contents.parts.all? do |part| + case part + when StringLiteral + part.comments.empty? && part.parts.length == 1 && + part.parts.first.is_a?(TStringContent) && + !part.parts.first.value.match?(/[\s\[\]\\]/) + when CHAR + !part.value.match?(/[\[\]\\]/) + else + false end + end end def qsymbols? - lbracket.comments.empty? && contents && contents.comments.empty? && - contents.parts.length > 1 && - contents.parts.all? do |part| - part.is_a?(SymbolLiteral) && part.comments.empty? - end - end - - def var_refs?(q) - lbracket.comments.empty? && contents && contents.comments.empty? && - contents.parts.all? do |part| - part.is_a?(VarRef) && part.comments.empty? - end && - ( - contents.parts.sum { |part| part.value.value.length + 2 } > - q.maxwidth * 2 - ) + contents.parts.all? do |part| + part.is_a?(SymbolLiteral) && part.comments.empty? + end end # If we have an empty array that contains only comments, then we're going @@ -1330,10 +1299,10 @@ def format(q) end end - # [nil | VarRef] the optional constant wrapper + # [nil | VarRef | ConstPathRef] the optional constant wrapper attr_reader :constant - # [Array[ untyped ]] the regular positional arguments that this array + # [Array[ Node ]] the regular positional arguments that this array # pattern is matching against attr_reader :requireds @@ -1341,7 +1310,7 @@ def format(q) # positional arguments attr_reader :rest - # [Array[ untyped ]] the list of positional arguments occurring after the + # [Array[ Node ]] the list of positional arguments occurring after the # optional star if there is one attr_reader :posts @@ -1451,7 +1420,7 @@ class Assign < Node # to assign the result of the expression to attr_reader :target - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1526,10 +1495,10 @@ def skip_indent? # # In the above example, the would be two Assoc nodes. class Assoc < Node - # [untyped] the key of this pair + # [Node] the key of this pair attr_reader :key - # [untyped] the value of this pair + # [nil | Node] the value of this pair attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1583,7 +1552,7 @@ def ===(other) private def format_contents(q) - q.parent.format_key(q, key) + (q.parent || HashKeyFormatter::Identity.new).format_key(q, key) return unless value if key.comments.empty? && AssignFormatting.skip_indent?(value) @@ -1604,7 +1573,7 @@ def format_contents(q) # { **pairs } # class AssocSplat < Node - # [nil | untyped] the expression that is being splatted + # [nil | Node] the expression that is being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1800,30 +1769,74 @@ def format_key(q, key) end end - def self.for(container) - labels = - container.assocs.all? do |assoc| - next true if assoc.is_a?(AssocSplat) - - case assoc.key - when Label - true - when SymbolLiteral - # When attempting to convert a hash rocket into a hash label, - # you need to take care because only certain patterns are - # allowed. Ruby source says that they have to match keyword - # arguments to methods, but don't specify what that is. After - # some experimentation, it looks like it's: - value = assoc.key.value.value - value.match?(/^[_A-Za-z]/) && !value.end_with?("=") - when DynaSymbol - true + # When formatting a single assoc node without the context of the parent + # hash, this formatter is used. It uses whatever is present in the node, + # because there is nothing to be consistent with. + class Identity + def format_key(q, key) + if key.is_a?(Label) + q.format(key) + else + q.format(key) + q.text(" =>") + end + end + end + + class << self + def for(container) + (assocs = container.assocs).each_with_index do |assoc, index| + if assoc.is_a?(AssocSplat) + # Splat nodes do not impact the formatting choice. + elsif assoc.value.nil? + # If the value is nil, then it has been omitted. In this case we + # have to match the existing formatting because standardizing would + # potentially break the code. For example: + # + # { first:, "second" => "value" } + # + return Identity.new else - false + # Otherwise, we need to check the type of the key. If it's a label + # or dynamic symbol, we can use labels. If it's a symbol literal + # then it needs to match a certain pattern to be used as a label. If + # it's anything else, then we need to use hash rockets. + case assoc.key + when Label, DynaSymbol + # Here labels can be used. + when SymbolLiteral + # When attempting to convert a hash rocket into a hash label, + # you need to take care because only certain patterns are + # allowed. Ruby source says that they have to match keyword + # arguments to methods, but don't specify what that is. After + # some experimentation, it looks like it's: + value = assoc.key.value.value + + if !value.match?(/^[_A-Za-z]/) || value.end_with?("=") + if omitted_value?(assocs[(index + 1)..]) + return Identity.new + else + return Rockets.new + end + end + else + if omitted_value?(assocs[(index + 1)..]) + return Identity.new + else + return Rockets.new + end + end end end - (labels ? Labels : Rockets).new + Labels.new + end + + private + + def omitted_value?(assocs) + assocs.any? { |assoc| !assoc.is_a?(AssocSplat) && assoc.value.nil? } + end end end @@ -1880,7 +1893,15 @@ def ===(other) end def format_key(q, key) - (@key_formatter ||= HashKeyFormatter.for(self)).format_key(q, key) + @key_formatter ||= + case q.parents.take(3).last + when Break, Next, ReturnNode + HashKeyFormatter::Identity.new + else + HashKeyFormatter.for(self) + end + + @key_formatter.format_key(q, key) end end @@ -1954,7 +1975,7 @@ def ===(other) # end # class PinnedBegin < Node - # [untyped] the expression being pinned + # [Node] the expression being pinned attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2035,13 +2056,13 @@ def name } end - # [untyped] the left-hand side of the expression + # [Node] the left-hand side of the expression attr_reader :left # [Symbol] the operator used between the two expressions attr_reader :operator - # [untyped] the right-hand side of the expression + # [Node] the right-hand side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -2089,16 +2110,22 @@ def deconstruct_keys(_keys) end def format(q) + left = self.left power = operator == :** q.group do q.group { q.format(left) } q.text(" ") unless power - if operator == :<< - q.text("<< ") - q.format(right) - else + if operator != :<< + q.group do + q.text(operator.name) + q.indent do + power ? q.breakable_empty : q.breakable_space + q.format(right) + end + end + elsif left.is_a?(Binary) && left.operator == :<< q.group do q.text(operator.name) q.indent do @@ -2106,6 +2133,9 @@ def format(q) q.format(right) end end + else + q.text("<< ") + q.format(right) end end end @@ -2195,6 +2225,14 @@ def ===(other) other.is_a?(BlockVar) && params === other.params && ArrayMatch.call(locals, other.locals) end + + # When a single required parameter is declared for a block, it gets + # automatically expanded if the values being yielded into it are an array. + def arg0? + params.requireds.length == 1 && params.optionals.empty? && + params.rest.nil? && params.posts.empty? && params.keywords.empty? && + params.keyword_rest.nil? && params.block.nil? + end end # BlockArg represents declaring a block parameter on a method definition. @@ -2288,7 +2326,9 @@ def initialize( @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) + rescue_clause = self.rescue_clause + @location = Location.new( start_line: location.start_line, @@ -2302,6 +2342,7 @@ def bind(start_char, start_column, end_char, end_column) # Here we're going to determine the bounds for the statements consequent = rescue_clause || else_clause || ensure_clause statements.bind( + parser, start_char, start_column, consequent ? consequent.location.start_char : end_char, @@ -2311,6 +2352,7 @@ def bind(start_char, start_column, end_char, end_column) # Next we're going to determine the rescue clause if there is one if rescue_clause consequent = else_clause || ensure_clause + rescue_clause.bind_end( consequent ? consequent.location.start_char : end_char, consequent ? consequent.location.start_column : end_column @@ -2692,7 +2734,7 @@ def format(q) # Of course there are a lot of caveats to that, including trailing operators # when necessary, where comments are places, how blocks are aligned, etc. class CallChainFormatter - # [Call | MethodAddBlock] the top of the call chain + # [CallNode | MethodAddBlock] the top of the call chain attr_reader :node def initialize(node) @@ -2716,7 +2758,7 @@ def format(q) children << receiver end when MethodAddBlock - if receiver.call.is_a?(CallNode) && !receiver.call.receiver.nil? + if (call = receiver.call).is_a?(CallNode) && !call.receiver.nil? children << receiver else break @@ -2725,8 +2767,8 @@ def format(q) break end when MethodAddBlock - if child.call.is_a?(CallNode) && !child.call.receiver.nil? - children << child.call + if (call = child.call).is_a?(CallNode) && !call.receiver.nil? + children << call else break end @@ -2748,8 +2790,8 @@ def format(q) # of just Statements nodes. parent = parents[3] if parent.is_a?(BlockNode) && parent.keywords? - if parent.is_a?(MethodAddBlock) && parent.call.is_a?(CallNode) && - parent.call.message.value == "sig" + if parent.is_a?(MethodAddBlock) && + (call = parent.call).is_a?(CallNode) && call.message.value == "sig" threshold = 2 end end @@ -2794,10 +2836,10 @@ def format_chain(q, children) while (child = children.pop) if child.is_a?(CallNode) - if child.receiver.is_a?(CallNode) && - (child.receiver.message != :call) && - (child.receiver.message.value == "where") && - (child.message.value == "not") + if (receiver = child.receiver).is_a?(CallNode) && + (receiver.message != :call) && + (receiver.message.value == "where") && + (child.message != :call && child.message.value == "not") # This is very specialized behavior wherein we group # .where.not calls together because it looks better. For more # information, see @@ -2821,8 +2863,11 @@ def format_chain(q, children) # If the parent call node has a comment on the message then we need # to print the operator trailing in order to keep it working. last_child = children.last - if last_child.is_a?(CallNode) && last_child.message.comments.any? && - last_child.operator + if last_child.is_a?(CallNode) && last_child.message != :call && + ( + (last_child.message.comments.any? && last_child.operator) || + (last_child.operator && last_child.operator.comments.any?) + ) q.format(CallOperatorFormatter.new(last_child.operator)) skip_operator = true else @@ -2853,7 +2898,8 @@ def self.chained?(node) when CallNode !node.receiver.nil? when MethodAddBlock - node.call.is_a?(CallNode) && !node.call.receiver.nil? + call = node.call + call.is_a?(CallNode) && !call.receiver.nil? else false end @@ -2913,7 +2959,7 @@ def format_child( # receiver.message # class CallNode < Node - # [nil | untyped] the receiver of the method call + # [nil | Node] the receiver of the method call attr_reader :receiver # [nil | :"::" | Op | Period] the operator being used to send the message @@ -3001,16 +3047,25 @@ def format(q) else q.format(message) - if arguments.is_a?(ArgParen) && arguments.arguments.nil? && - !message.is_a?(Const) - # If you're using an explicit set of parentheses on something that - # looks like a constant, then we need to match that in order to - # maintain valid Ruby. For example, you could do something like Foo(), - # on which we would need to keep the parentheses to make it look like - # a method call. - else - q.format(arguments) - end + # Note that this explicitly leaves parentheses in place even if they are + # empty. There are two reasons we would need to do this. The first is if + # we're calling something that looks like a constant, as in: + # + # Foo() + # + # In this case if we remove the parentheses then this becomes a constant + # reference and not a method call. The second is if we're calling a + # method that is the same name as a local variable that is in scope, as + # in: + # + # foo = foo() + # + # In this case we have to keep the parentheses or else it treats this + # like assigning nil to the local variable. Note that we could attempt + # to be smarter about this by tracking the local variables that are in + # scope, but for now it's simpler and more efficient to just leave the + # parentheses in place. + q.format(arguments) if arguments end end @@ -3059,6 +3114,10 @@ def format_contents(q) end end end + + def arity + arguments&.arity || 0 + end end # Case represents the beginning of a case chain. @@ -3076,7 +3135,7 @@ class Case < Node # [Kw] the keyword that opens this expression attr_reader :keyword - # [nil | untyped] optional value being switched on + # [nil | Node] optional value being switched on attr_reader :value # [In | When] the next clause in the chain @@ -3155,14 +3214,14 @@ def ===(other) # value => pattern # class RAssign < Node - # [untyped] the left-hand expression + # [Node] the left-hand expression attr_reader :value # [Kw | Op] the operator being used to match against the pattern, which is # either => or in attr_reader :operator - # [untyped] the pattern on the right-hand side of the expression + # [Node] the pattern on the right-hand side of the expression attr_reader :pattern # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3273,7 +3332,7 @@ class ClassDeclaration < Node # defined attr_reader :constant - # [nil | untyped] the optional superclass declaration + # [nil | Node] the optional superclass declaration attr_reader :superclass # [BodyStmt] the expressions to execute within the context of the class @@ -3411,7 +3470,7 @@ class Command < Node # [Args] the arguments being sent with the message attr_reader :arguments - # [nil | Block] the optional block being passed to the method + # [nil | BlockNode] the optional block being passed to the method attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3472,6 +3531,10 @@ def ===(other) arguments === other.arguments && block === other.block end + def arity + arguments.arity + end + private def align(q, node, &block) @@ -3513,19 +3576,19 @@ def align(q, node, &block) # object.method argument # class CommandCall < Node - # [untyped] the receiver of the message + # [nil | Node] the receiver of the message attr_reader :receiver - # [:"::" | Op | Period] the operator used to send the message + # [nil | :"::" | Op | Period] the operator used to send the message attr_reader :operator - # [Const | Ident | Op] the message being send + # [:call | Const | Ident | Op] the message being send attr_reader :message - # [nil | Args] the arguments going along with the message + # [nil | Args | ArgParen] the arguments going along with the message attr_reader :arguments - # [nil | Block] the block associated with this method call + # [nil | BlockNode] the block associated with this method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -3593,6 +3656,10 @@ def deconstruct_keys(_keys) end def format(q) + message = self.message + arguments = self.arguments + block = self.block + q.group do doc = q.nest(0) do @@ -3601,7 +3668,7 @@ def format(q) # If there are leading comments on the message then we know we have # a newline in the source that is forcing these things apart. In # this case we will have to use a trailing operator. - if message.comments.any?(&:leading?) + if message != :call && message.comments.any?(&:leading?) q.format(CallOperatorFormatter.new(operator), stackable: false) q.indent do q.breakable_empty @@ -3637,6 +3704,10 @@ def ===(other) arguments === other.arguments && block === other.block end + def arity + arguments&.arity || 0 + end + private def argument_alignment(q, doc) @@ -3807,7 +3878,7 @@ def ===(other) # object::Const = value # class ConstPathField < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -3871,7 +3942,7 @@ def ===(other) # object::Const # class ConstPathRef < Node - # [untyped] the source of the constant + # [Node] the source of the constant attr_reader :parent # [Const] the constant itself @@ -4040,7 +4111,7 @@ def ===(other) # def object.method(param) result end # class DefNode < Node - # [nil | untyped] the target where the method is being defined + # [nil | Node] the target where the method is being defined attr_reader :target # [nil | Op | Period] the operator being used to declare the method @@ -4052,7 +4123,7 @@ class DefNode < Node # [nil | Params | Paren] the parameter declaration for the method attr_reader :params - # [BodyStmt | untyped] the expressions to be executed by the method + # [BodyStmt | Node] the expressions to be executed by the method attr_reader :bodystmt # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4113,9 +4184,13 @@ def deconstruct_keys(_keys) end def format(q) + params = self.params + bodystmt = self.bodystmt + q.group do q.group do - q.text("def ") + q.text("def") + q.text(" ") if target || name.comments.empty? if target q.format(target) @@ -4166,6 +4241,19 @@ def ===(other) def endless? !bodystmt.is_a?(BodyStmt) end + + def arity + params = self.params + + case params + when Params + params.arity + when Paren + params.contents.arity + else + 0..0 + end + end end # Defined represents the use of the +defined?+ operator. It can be used with @@ -4174,7 +4262,7 @@ def endless? # defined?(variable) # class Defined < Node - # [untyped] the value being sent to the keyword + # [Node] the value being sent to the keyword attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4331,7 +4419,7 @@ def format(q) # are no parentheses around the arguments to that command, so we need to # break the block. case q.parent - when Command, CommandCall + when nil, Command, CommandCall q.break_parent format_break(q, break_opening, break_closing) return @@ -4353,6 +4441,15 @@ def keywords? opening.is_a?(Kw) end + def arity + case block_var + when BlockVar + block_var.params.arity + else + 0..0 + end + end + private # If this is nested anywhere inside certain nodes, then we can't change @@ -4376,7 +4473,7 @@ def unchangeable_bounds?(q) # If we're a sibling of a control-flow keyword, then we're going to have to # use the do..end bounds. def forced_do_end_bounds?(q) - case q.parent.call + case q.parent&.call when Break, Next, ReturnNode, Super true else @@ -4456,13 +4553,13 @@ def format_flat(q, flat_opening, flat_closing) # # One of the sides of the expression may be nil, but not both. class RangeNode < Node - # [nil | untyped] the left side of the expression + # [nil | Node] the left side of the expression attr_reader :left # [Op] the operator used for this range attr_reader :operator - # [nil | untyped] the right side of the expression + # [nil | Node] the right side of the expression attr_reader :right # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4583,7 +4680,7 @@ class DynaSymbol < Node # dynamic symbol attr_reader :parts - # [String] the quote used to delimit the dynamic symbol + # [nil | String] the quote used to delimit the dynamic symbol attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -4781,7 +4878,7 @@ def ===(other) # end # class Elsif < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -4877,6 +4974,25 @@ class EmbDoc < Node def initialize(value:, location:) @value = value @location = location + + @leading = false + @trailing = false + end + + def leading! + @leading = true + end + + def leading? + @leading + end + + def trailing! + @trailing = true + end + + def trailing? + @trailing end def inline? @@ -4913,7 +5029,13 @@ def deconstruct_keys(_keys) end def format(q) - q.trim + if (q.parent.is_a?(DefNode) && q.parent.endless?) || + q.parent.is_a?(Statements) + q.trim + else + q.breakable_return + end + q.text(value) end @@ -5182,7 +5304,7 @@ def ===(other) # object.variable = value # class Field < Node - # [untyped] the parent object that owns the field being assigned + # [Node] the parent object that owns the field being assigned attr_reader :parent # [:"::" | Op | Period] the operator being used for the assignment @@ -5207,6 +5329,7 @@ def accept(visitor) end def child_nodes + operator = self.operator [parent, (operator if operator != :"::"), name] end @@ -5308,13 +5431,13 @@ def ===(other) # end # class FndPtn < Node - # [nil | untyped] the optional constant wrapper + # [nil | VarRef | ConstPathRef] the optional constant wrapper attr_reader :constant # [VarField] the splat on the left-hand side attr_reader :left - # [Array[ untyped ]] the list of positional expressions in the pattern that + # [Array[ Node ]] the list of positional expressions in the pattern that # are being matched attr_reader :values @@ -5410,7 +5533,7 @@ class For < Node # pull values out of the object being enumerated attr_reader :index - # [untyped] the object being enumerated in the loop + # [Node] the object being enumerated in the loop attr_reader :collection # [Statements] the statements to be executed @@ -5588,7 +5711,7 @@ def accept(visitor) end def child_nodes - [lbrace] + assocs + [lbrace].concat(assocs) end def copy(lbrace: nil, assocs: nil, location: nil) @@ -5680,7 +5803,7 @@ class Heredoc < Node # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(beginning:, ending: nil, dedent: 0, parts: [], location:) + def initialize(beginning:, location:, ending: nil, dedent: 0, parts: []) @beginning = beginning @ending = ending @dedent = dedent @@ -5889,7 +6012,7 @@ class KeywordFormatter # [Label] the keyword being used attr_reader :key - # [untyped] the optional value for the keyword + # [Node] the optional value for the keyword attr_reader :value def initialize(key, value) @@ -5902,7 +6025,7 @@ def comments end def format(q) - q.format(key) + HashKeyFormatter::Labels.new.format_key(q, key) if value q.text(" ") @@ -5930,11 +6053,11 @@ def format(q) end end - # [nil | untyped] the optional constant wrapper + # [nil | VarRef | ConstPathRef] the optional constant wrapper attr_reader :constant - # [Array[ [Label, untyped] ]] the set of tuples representing the keywords - # that should be matched against in the pattern + # [Array[ [DynaSymbol | Label, nil | Node] ]] the set of tuples + # representing the keywords that should be matched against in the pattern attr_reader :keywords # [nil | VarField] an optional parameter to gather up all remaining keywords @@ -6048,6 +6171,8 @@ def ===(other) private def format_contents(q, parts, nested) + keyword_rest = self.keyword_rest + q.group { q.seplist(parts) { |part| q.format(part, stackable: false) } } # If there isn't a constant, and there's a blank keyword_rest, then we @@ -6144,7 +6269,7 @@ def self.call(parent) module Ternaryable class << self def call(q, node) - return false if ENV["STREE_FAST_FORMAT"] + return false if ENV["STREE_FAST_FORMAT"] || q.disable_auto_ternary? # If this is a conditional inside of a parentheses as the only content, # then we don't want to transform it into a ternary. Presumably the user @@ -6160,7 +6285,7 @@ def call(q, node) # want to force it to not be a ternary, like if the predicate is an # assignment because it's hard to read. case node.predicate - when Assign, Command, CommandCall, MAssign, OpAssign + when Assign, Binary, Command, CommandCall, MAssign, OpAssign return false when Not return false unless node.predicate.parentheses? @@ -6183,10 +6308,10 @@ def call(q, node) # and default instead to breaking them into multiple lines. def ternaryable?(statement) case statement - when AliasNode, Assign, Break, Command, CommandCall, Heredoc, IfNode, - IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, ReturnNode, - Super, Undef, UnlessNode, UntilNode, VoidStmt, WhileNode, - YieldNode, ZSuper + when AliasNode, Assign, Break, Command, CommandCall, Defined, Heredoc, + IfNode, IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, + ReturnNode, Super, Undef, UnlessNode, UntilNode, VoidStmt, + WhileNode, YieldNode, ZSuper # This is a list of nodes that should not be allowed to be a part of a # ternary clause. false @@ -6359,7 +6484,7 @@ def contains_conditional? # end # class IfNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -6432,13 +6557,13 @@ def modifier? # predicate ? truthy : falsy # class IfOp < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate - # [untyped] the expression to be executed if the predicate is truthy + # [Node] the expression to be executed if the predicate is truthy attr_reader :truthy - # [untyped] the expression to be executed if the predicate is falsy + # [Node] the expression to be executed if the predicate is falsy attr_reader :falsy # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -6487,9 +6612,26 @@ def deconstruct_keys(_keys) def format(q) force_flat = [ - AliasNode, Assign, Break, Command, CommandCall, Heredoc, IfNode, IfOp, - Lambda, MAssign, Next, OpAssign, RescueMod, ReturnNode, Super, Undef, - UnlessNode, VoidStmt, YieldNode, ZSuper + AliasNode, + Assign, + Break, + Command, + CommandCall, + Heredoc, + IfNode, + IfOp, + Lambda, + MAssign, + Next, + OpAssign, + RescueMod, + ReturnNode, + Super, + Undef, + UnlessNode, + VoidStmt, + YieldNode, + ZSuper ] if q.parent.is_a?(Paren) || force_flat.include?(truthy.class) || @@ -6605,7 +6747,7 @@ def ===(other) # end # class In < Node - # [untyped] the pattern to check against + # [Node] the pattern to check against attr_reader :pattern # [Statements] the expressions to execute if the pattern matched @@ -6660,10 +6802,13 @@ def deconstruct_keys(_keys) def format(q) keyword = "in " + pattern = self.pattern + consequent = self.consequent q.group do q.text(keyword) q.nest(keyword.length) { q.format(pattern) } + q.text(" then") if pattern.is_a?(RangeNode) && pattern.right.nil? unless statements.empty? q.indent do @@ -7061,6 +7206,8 @@ def deconstruct_keys(_keys) end def format(q) + params = self.params + q.text("->") q.group do if params.is_a?(Paren) @@ -7078,36 +7225,17 @@ def format(q) q.text(" ") q .if_break do - force_parens = - q.parents.any? do |node| - node.is_a?(Command) || node.is_a?(CommandCall) - end - - if force_parens - q.text("{") + q.text("do") - unless statements.empty? - q.indent do - q.breakable_space - q.format(statements) - end + unless statements.empty? + q.indent do q.breakable_space + q.format(statements) end - - q.text("}") - else - q.text("do") - - unless statements.empty? - q.indent do - q.breakable_space - q.format(statements) - end - end - - q.breakable_space - q.text("end") end + + q.breakable_space + q.text("end") end .if_flat do q.text("{") @@ -7388,7 +7516,7 @@ class MAssign < Node # [MLHS | MLHSParen] the target of the multiple assignment attr_reader :target - # [untyped] the value being assigned + # [Node] the value being assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7448,10 +7576,10 @@ def ===(other) # method {} # class MethodAddBlock < Node - # [Call | Command | CommandCall] the method call + # [ARef | CallNode | Command | CommandCall | Super | ZSuper] the method call attr_reader :call - # [Block] the block being sent with the method call + # [BlockNode] the block being sent with the method call attr_reader :block # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7523,8 +7651,12 @@ def format_contents(q) # first, second, third = value # class MLHS < Node - # Array[ARefField | ArgStar | Field | Ident | MLHSParen | VarField] the - # parts of the left-hand side of a multiple assignment + # [ + # Array[ + # ARefField | ArgStar | ConstPathField | Field | Ident | MLHSParen | + # TopConstField | VarField + # ] + # ] the parts of the left-hand side of a multiple assignment attr_reader :parts # [boolean] whether or not there is a trailing comma at the end of this @@ -7535,7 +7667,7 @@ class MLHS < Node # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(parts:, comma: false, location:) + def initialize(parts:, location:, comma: false) @parts = parts @comma = comma @location = location @@ -7596,7 +7728,7 @@ class MLHSParen < Node # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(contents:, comma: false, location:) + def initialize(contents:, location:, comma: false) @contents = contents @comma = comma @location = location @@ -7750,7 +7882,7 @@ def format_declaration(q) # values = first, second, third # class MRHS < Node - # Array[untyped] the parts that are being assigned + # [Array[Node]] the parts that are being assigned attr_reader :parts # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -7926,7 +8058,7 @@ class OpAssign < Node # [Op] the operator being used for the assignment attr_reader :operator - # [untyped] the expression to be assigned + # [Node] the expression to be assigned attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -8083,7 +8215,7 @@ class OptionalFormatter # [Ident] the name of the parameter attr_reader :name - # [untyped] the value of the parameter + # [Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8108,7 +8240,7 @@ class KeywordFormatter # [Ident] the name of the parameter attr_reader :name - # [nil | untyped] the value of the parameter + # [nil | Node] the value of the parameter attr_reader :value def initialize(name, value) @@ -8149,10 +8281,10 @@ def format(q) end end - # [Array[ Ident ]] any required parameters + # [Array[ Ident | MLHSParen ]] any required parameters attr_reader :requireds - # [Array[ [ Ident, untyped ] ]] any optional parameters and their default + # [Array[ [ Ident, Node ] ]] any optional parameters and their default # values attr_reader :optionals @@ -8160,15 +8292,16 @@ def format(q) # parameter attr_reader :rest - # [Array[ Ident ]] any positional parameters that exist after a rest - # parameter + # [Array[ Ident | MLHSParen ]] any positional parameters that exist after a + # rest parameter attr_reader :posts - # [Array[ [ Ident, nil | untyped ] ]] any keyword parameters and their + # [Array[ [ Label, nil | Node ] ]] any keyword parameters and their # optional default values attr_reader :keywords - # [nil | :nil | KwRestParam] the optional keyword rest parameter + # [nil | :nil | ArgsForward | KwRestParam] the optional keyword rest + # parameter attr_reader :keyword_rest # [nil | BlockArg] the optional block parameter @@ -8178,14 +8311,14 @@ def format(q) attr_reader :comments def initialize( + location:, requireds: [], optionals: [], rest: nil, posts: [], keywords: [], keyword_rest: nil, - block: nil, - location: + block: nil ) @requireds = requireds @optionals = optionals @@ -8212,6 +8345,8 @@ def accept(visitor) end def child_nodes + keyword_rest = self.keyword_rest + [ *requireds, *optionals.flatten(1), @@ -8266,16 +8401,19 @@ def deconstruct_keys(_keys) end def format(q) + rest = self.rest + keyword_rest = self.keyword_rest + parts = [ *requireds, *optionals.map { |(name, value)| OptionalFormatter.new(name, value) } ] parts << rest if rest && !rest.is_a?(ExcessedComma) - parts += [ - *posts, - *keywords.map { |(name, value)| KeywordFormatter.new(name, value) } - ] + parts.concat(posts) + parts.concat( + keywords.map { |(name, value)| KeywordFormatter.new(name, value) } + ) parts << KeywordRestFormatter.new(keyword_rest) if keyword_rest parts << block if block @@ -8316,6 +8454,29 @@ def ===(other) keyword_rest === other.keyword_rest && block === other.block end + # Returns a range representing the possible number of arguments accepted + # by this params node not including the block. For example: + # + # def foo(a, b = 1, c:, d: 2, &block) + # ... + # end + # + # has arity 2..4. + # + def arity + optional_keywords = keywords.count { |_label, value| value } + + lower_bound = + requireds.length + posts.length + keywords.length - optional_keywords + + upper_bound = + if keyword_rest.nil? && rest.nil? + lower_bound + optionals.length + optional_keywords + end + + lower_bound..upper_bound + end + private def format_contents(q, parts) @@ -8334,7 +8495,7 @@ class Paren < Node # [LParen] the left parenthesis that opened this statement attr_reader :lparen - # [nil | untyped] the expression inside the parentheses + # [nil | Node] the expression inside the parentheses attr_reader :contents # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -8379,6 +8540,8 @@ def deconstruct_keys(_keys) end def format(q) + contents = self.contents + q.group do q.format(lparen) @@ -9183,7 +9346,7 @@ def ambiguous?(q) # end # class RescueEx < Node - # [untyped] the list of exceptions being rescued + # [nil | Node] the list of exceptions being rescued attr_reader :exceptions # [nil | Field | VarField] the expression being used to capture the raised @@ -9261,7 +9424,7 @@ class Rescue < Node # [Kw] the rescue keyword attr_reader :keyword - # [RescueEx] the exceptions being rescued + # [nil | RescueEx] the exceptions being rescued attr_reader :exception # [Statements] the expressions to evaluate when an error is rescued @@ -9293,11 +9456,11 @@ def bind_end(end_char, end_column) end_column: end_column ) - if consequent - consequent.bind_end(end_char, end_column) + if (next_node = consequent) + next_node.bind_end(end_char, end_column) statements.bind_end( - consequent.location.start_char, - consequent.location.start_column + next_node.location.start_char, + next_node.location.start_column ) else statements.bind_end(end_char, end_column) @@ -9381,10 +9544,10 @@ def ===(other) # expression rescue value # class RescueMod < Node - # [untyped] the expression to execute + # [Node] the expression to execute attr_reader :statement - # [untyped] the value to use if the executed expression raises an error + # [Node] the value to use if the executed expression raises an error attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -9643,7 +9806,7 @@ def ===(other) # end # class SClass < Node - # [untyped] the target of the singleton class to enter + # [Node] the target of the singleton class to enter attr_reader :target # [BodyStmt] the expressions to be executed @@ -9717,23 +9880,19 @@ def ===(other) # propagate that onto void_stmt nodes inside the stmts in order to make sure # all comments get printed appropriately. class Statements < Node - # [SyntaxTree] the parser that is generating this node - attr_reader :parser - - # [Array[ untyped ]] the list of expressions contained within this node + # [Array[ Node ]] the list of expressions contained within this node attr_reader :body # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments - def initialize(parser, body:, location:) - @parser = parser + def initialize(body:, location:) @body = body @location = location @comments = [] end - def bind(start_char, start_column, end_char, end_column) + def bind(parser, start_char, start_column, end_char, end_column) @location = Location.new( start_line: location.start_line, @@ -9744,8 +9903,8 @@ def bind(start_char, start_column, end_char, end_column) end_column: end_column ) - if body[0].is_a?(VoidStmt) - location = body[0].location + if (void_stmt = body[0]).is_a?(VoidStmt) + location = void_stmt.location location = Location.new( start_line: location.start_line, @@ -9759,7 +9918,7 @@ def bind(start_char, start_column, end_char, end_column) body[0] = VoidStmt.new(location: location) end - attach_comments(start_char, end_char) + attach_comments(parser, start_char, end_char) end def bind_end(end_char, end_column) @@ -9791,7 +9950,6 @@ def child_nodes def copy(body: nil, location: nil) node = Statements.new( - parser, body: body || self.body, location: location || self.location ) @@ -9803,7 +9961,7 @@ def copy(body: nil, location: nil) alias deconstruct child_nodes def deconstruct_keys(_keys) - { parser: parser, body: body, location: location, comments: comments } + { body: body, location: location, comments: comments } end def format(q) @@ -9863,7 +10021,7 @@ def ===(other) # As efficiently as possible, gather up all of the comments that have been # found while this statements list was being parsed and add them into the # body. - def attach_comments(start_char, end_char) + def attach_comments(parser, start_char, end_char) parser_comments = parser.comments comment_index = 0 @@ -9910,9 +10068,13 @@ class StringContent < Node # string attr_reader :parts + # [Array[ Comment | EmbDoc ]] the comments attached to this node + attr_reader :comments + def initialize(parts:, location:) @parts = parts @location = location + @comments = [] end def accept(visitor) @@ -9939,6 +10101,33 @@ def deconstruct_keys(_keys) def ===(other) other.is_a?(StringContent) && ArrayMatch.call(parts, other.parts) end + + def format(q) + q.text(q.quote) + q.group do + parts.each do |part| + if part.is_a?(TStringContent) + value = Quotes.normalize(part.value, q.quote) + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) + end + + q.breakable_return if value.end_with?("\n") + else + q.format(part) + end + end + end + q.text(q.quote) + end end # StringConcat represents concatenating two strings together using a backward @@ -9948,7 +10137,8 @@ def ===(other) # "second" # class StringConcat < Node - # [StringConcat | StringLiteral] the left side of the concatenation + # [Heredoc | StringConcat | StringLiteral] the left side of the + # concatenation attr_reader :left # [StringLiteral] the right side of the concatenation @@ -10145,7 +10335,7 @@ class StringLiteral < Node # string literal attr_reader :parts - # [String] which quote was used by the string literal + # [nil | String] which quote was used by the string literal attr_reader :quote # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10193,7 +10383,7 @@ def format(q) opening_quote, closing_quote = if !Quotes.locked?(self, q.quote) [q.quote, q.quote] - elsif quote.start_with?("%") + elsif quote&.start_with?("%") [quote, Quotes.matching(quote[/%[qQ]?(.)/, 1])] else [quote, quote] @@ -10390,8 +10580,8 @@ def ===(other) # :symbol # class SymbolLiteral < Node - # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op] the value of the - # symbol + # [Backtick | Const | CVar | GVar | Ident | IVar | Kw | Op | TStringContent] + # the value of the symbol attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -10430,6 +10620,7 @@ def deconstruct_keys(_keys) def format(q) q.text(":") + q.text("\\") if value.comments.any? q.format(value) end @@ -10899,7 +11090,7 @@ def ===(other) # not value # class Not < Node - # [nil | untyped] the statement on which to operate + # [nil | Node] the statement on which to operate attr_reader :statement # [boolean] whether or not parentheses were used @@ -10986,7 +11177,7 @@ class Unary < Node # [String] the operator being used attr_reader :operator - # [untyped] the statement on which to operate + # [Node] the statement on which to operate attr_reader :statement # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11130,7 +11321,7 @@ def ===(other) # end # class UnlessNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11276,7 +11467,7 @@ def format_break(q) # end # class UntilNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed @@ -11344,7 +11535,7 @@ def modifier? # # In the example above, the VarField node represents the +variable+ token. class VarField < Node - # [nil | Const | CVar | GVar | Ident | IVar] the target of this node + # [nil | :nil | Const | CVar | GVar | Ident | IVar] the target of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11361,7 +11552,7 @@ def accept(visitor) end def child_nodes - [value] + value == :nil ? [] : [value] end def copy(value: nil, location: nil) @@ -11455,8 +11646,9 @@ def ===(other) # # To be clear, this method should just not exist. It's not good. It's a # place of shame. But it's necessary for now, so I'm keeping it. - def pin(parent) - replace = PinnedVarRef.new(value: value, location: location) + def pin(parent, pin) + replace = + PinnedVarRef.new(value: value, location: pin.location.to(location)) parent .deconstruct_keys([]) @@ -11467,6 +11659,10 @@ def pin(parent) elsif value.is_a?(Array) && (index = value.index(self)) parent.public_send(key)[index] = replace break + elsif value.is_a?(Array) && + (index = value.index { |(_k, v)| v == self }) + parent.public_send(key)[index][1] = replace + break end end end @@ -11482,7 +11678,7 @@ def pin(parent) # This can be a plain local variable like the example above. It can also be a # a class variable, a global variable, or an instance variable. class PinnedVarRef < Node - # [VarRef] the value of this node + # [Const | CVar | GVar | Ident | IVar] the value of this node attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -11585,6 +11781,10 @@ def ===(other) def access_control? @access_control ||= %w[private protected public].include?(value.value) end + + def arity + 0 + end end # VoidStmt represents an empty lexical block of code. @@ -11592,9 +11792,6 @@ def access_control? # ;; # class VoidStmt < Node - # [Location] the location of this node - attr_reader :location - # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments @@ -11755,7 +11952,7 @@ def ===(other) # end # class WhileNode < Node - # [untyped] the expression to be checked + # [Node] the expression to be checked attr_reader :predicate # [Statements] the expressions to be executed diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 85f6661e..ace077ee 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -53,7 +53,7 @@ def initialize(start, line) # there's a BOM at the beginning of the file, which is the reason we need # to compare it to 0 here. def [](byteindex) - indices[byteindex < 0 ? 0 : byteindex] + indices[[byteindex, 0].max] end end @@ -256,11 +256,37 @@ def find_token(type) tokens[index] if index end + def find_token_between(type, left, right) + bounds = left.location.end_char...right.location.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(type) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_keyword(name) index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) } tokens[index] if index end + def find_keyword_between(name, left, right) + bounds = left.end_char...right.start_char + index = + tokens.rindex do |token| + char = token.location.start_char + break if char < bounds.begin + + token.is_a?(Kw) && (token.name == name) && bounds.cover?(char) + end + + tokens[index] if index + end + def find_operator(name) index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) } tokens[index] if index @@ -348,6 +374,7 @@ def on_BEGIN(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -386,6 +413,7 @@ def on_END(statements) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( + self, start_char, start_char - line_counts[lbrace.location.start_line - 1].start, rbrace.location.start_char, @@ -640,13 +668,18 @@ def visit(node) stack.pop end - def visit_var_ref(node) - pins.shift - node.pin(stack[-2]) + visit_methods do + def visit_var_ref(node) + if node.start_char > pins.first.start_char + node.pin(stack[-2], pins.shift) + else + super + end + end end def self.visit(node, tokens) - start_char = node.location.start_char + start_char = node.start_char allocated = [] tokens.reverse_each do |token| @@ -670,18 +703,22 @@ def self.visit(node, tokens) # (nil | Array[untyped]) posts # ) -> AryPtn def on_aryptn(constant, requireds, rest, posts) - parts = [constant, *requireds, rest, *posts].compact + lbracket = find_token(LBracket) + lbracket ||= find_token(LParen) if constant - # If there aren't any parts (no constant, no positional arguments), then - # we're matching an empty array. In this case, we're going to look for the - # left and right brackets explicitly. Otherwise, we'll just use the bounds - # of the various parts. - location = - if parts.empty? - consume_token(LBracket).location.to(consume_token(RBracket).location) - else - parts[0].location.to(parts[-1].location) - end + rbracket = find_token(RBracket) + rbracket ||= find_token(RParen) if constant + + parts = [constant, lbracket, *requireds, rest, *posts, rbracket].compact + + # The location is going to be determined by the first part to the last + # part. This includes potential brackets. + location = parts[0].location.to(parts[-1].location) + + # Now that we have the location calculated, we can remove the brackets + # from the list of tokens. + tokens.delete(lbracket) if lbracket + tokens.delete(rbracket) if rbracket # If there is a plain *, then we're going to fix up the location of it # here because it currently doesn't have anything to use for its precise @@ -820,6 +857,7 @@ def on_begin(bodystmt) end bodystmt.bind( + self, find_next_statement_start(keyword.location.end_char), keyword.location.end_column, end_location.end_char, @@ -871,13 +909,34 @@ def on_binary(left, operator, right) # on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar def on_block_var(params, locals) index = - tokens.rindex do |node| - node.is_a?(Op) && %w[| ||].include?(node.value) && - node.location.start_char < params.location.start_char - end + tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) } + + ending = tokens.delete_at(index) + beginning = ending.value == "||" ? ending : consume_operator(:|) + + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + start_line = params.location.start_line + start_char = + ( + if beginning.value == "||" + beginning.location.start_char + else + find_next_statement_start(beginning.location.end_char) + end + ) + + location = + Location.fixed( + line: start_line, + char: start_char, + column: start_char - line_counts[start_line - 1].start + ) - beginning = tokens[index] - ending = tokens[-1] + params = params.copy(location: location) + end BlockVar.new( params: params, @@ -905,6 +964,14 @@ def on_blockarg(name) # (nil | Ensure) ensure_clause # ) -> BodyStmt def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + # In certain versions of Ruby, the `statements` argument can be any node + # in the case that we're inside of an endless method definition. In this + # case we'll wrap it in a Statements node to be consistent. + unless statements.is_a?(Statements) + statements = + Statements.new(body: [statements], location: statements.location) + end + parts = [statements, rescue_clause, else_clause, ensure_clause].compact BodyStmt.new( @@ -929,6 +996,7 @@ def on_brace_block(block_var, statements) start_char = find_next_statement_start(location.end_char) statements.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, rbrace.location.start_char, @@ -995,22 +1063,11 @@ def on_call(receiver, operator, message) # :call-seq: # on_case: (untyped value, untyped consequent) -> Case | RAssign def on_case(value, consequent) - if (keyword = find_keyword(:case)) - tokens.delete(keyword) - - Case.new( - keyword: keyword, - value: value, - consequent: consequent, - location: keyword.location.to(consequent.location) - ) - else - operator = - if (keyword = find_keyword(:in)) - tokens.delete(keyword) - else - consume_operator(:"=>") - end + if value && (operator = find_keyword(:in) || find_operator(:"=>")) && + (value.location.end_char...consequent.location.start_char).cover?( + operator.location.start_char + ) + tokens.delete(operator) node = RAssign.new( @@ -1022,6 +1079,15 @@ def on_case(value, consequent) PinVisitor.visit(node, tokens) node + else + keyword = consume_keyword(:case) + + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: keyword.location.to(consequent.location) + ) end end @@ -1038,6 +1104,7 @@ def on_class(constant, superclass, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1105,6 +1172,7 @@ def on_command_call(receiver, operator, message, arguments) # :call-seq: # on_comment: (String value) -> Comment def on_comment(value) + # char is the index of the # character in the source. char = char_pos location = Location.token( @@ -1114,8 +1182,24 @@ def on_comment(value) size: value.size - 1 ) - index = source.rindex(/[^\t ]/, char - 1) if char != 0 - inline = index && (source[index] != "\n") + # Loop backward in the source string, starting from the beginning of the + # comment, and find the first character that is not a space or a tab. If + # index is -1, this indicates that we've checked all of the characters + # back to the start of the source, so this comment must be at the + # beginning of the file. + # + # We are purposefully not using rindex or regular expressions here because + # they check if there are invalid characters, which is actually possible + # with the use of __END__ syntax. + index = char - 1 + while index > -1 && (source[index] == "\t" || source[index] == " ") + index -= 1 + end + + # If we found a character that was not a space or a tab before the comment + # and it's a newline, then this comment is inline. Otherwise, it stands on + # its own and can be attached as its own node in the tree. + inline = index != -1 && source[index] != "\n" comment = Comment.new(value: value.chomp, inline: inline, location: location) @@ -1139,13 +1223,23 @@ def on_const(value) end # :call-seq: - # on_const_path_field: (untyped parent, Const constant) -> ConstPathField + # on_const_path_field: (untyped parent, Const constant) -> + # ConstPathField | Field def on_const_path_field(parent, constant) - ConstPathField.new( - parent: parent, - constant: constant, - location: parent.location.to(constant.location) - ) + if constant.is_a?(Const) + ConstPathField.new( + parent: parent, + constant: constant, + location: parent.location.to(constant.location) + ) + else + Field.new( + parent: parent, + operator: consume_operator(:"::"), + name: constant, + location: parent.location.to(constant.location) + ) + end end # :call-seq: @@ -1220,6 +1314,7 @@ def on_def(name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1308,6 +1403,7 @@ def on_defs(target, operator, name, params, bodystmt) start_char = find_next_statement_start(params.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[params.location.start_line - 1].start, ending.location.start_char, @@ -1347,6 +1443,7 @@ def on_do_block(block_var, bodystmt) start_char = find_next_statement_start(location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[location.start_line - 1].start, ending.location.start_char, @@ -1442,6 +1539,7 @@ def on_else(statements) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1465,8 +1563,16 @@ def on_elsif(predicate, statements, consequent) beginning = consume_keyword(:elsif) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + delimiter = + find_keyword_between(:then, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.start_line - 1].start, ending.location.start_char, @@ -1590,6 +1696,7 @@ def on_ensure(statements) ending = find_keyword(:end) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( + self, start_char, start_char - line_counts[keyword.location.start_line - 1].start, ending.location.start_char, @@ -1629,13 +1736,13 @@ def on_fcall(value) # :call-seq: # on_field: ( # untyped parent, - # (:"::" | Op | Period) operator + # (:"::" | Op | Period | 73) operator # (Const | Ident) name # ) -> Field def on_field(parent, operator, name) Field.new( parent: parent, - operator: operator, + operator: operator == 73 ? :"::" : operator, name: name, location: parent.location.to(name.location) ) @@ -1664,6 +1771,22 @@ def on_float(value) # VarField right # ) -> FndPtn def on_fndptn(constant, left, values, right) + # The left and right of a find pattern are always going to be splats, so + # we're going to consume the * operators and use their location + # information to extend the location of the splats. + right, left = + [right, left].map do |node| + operator = consume_operator(:*) + location = + if node.value + operator.location.to(node.location) + else + operator.location + end + + node.copy(location: location) + end + # The opening of this find pattern is either going to be a left bracket, a # right left parenthesis, or the left splat. We're going to use this to # determine how to find the closing of the pattern, as well as determining @@ -1704,21 +1827,20 @@ def on_for(index, collection, statements) in_keyword = consume_keyword(:in) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && - keyword.location.start_char > collection.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, collection, ending) || + find_token_between(Semicolon, collection, ending) + + tokens.delete(delimiter) if delimiter start_char = - find_next_statement_start((keyword || collection).location.end_char) + find_next_statement_start((delimiter || collection).location.end_char) + statements.bind( + self, start_char, start_char - - line_counts[(keyword || collection).location.end_line - 1].start, + line_counts[(delimiter || collection).location.end_line - 1].start, ending.location.start_char, ending.location.start_column ) @@ -1772,7 +1894,7 @@ def on_heredoc_beg(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) # Here we're going to artificially create an extra node type so that if @@ -1807,7 +1929,7 @@ def on_heredoc_end(value) line: lineno, char: char_pos, column: current_column, - size: value.size + 1 + size: value.size ) heredoc_end = HeredocEnd.new(value: value.chomp, location: location) @@ -1822,9 +1944,9 @@ def on_heredoc_end(value) start_line: heredoc.location.start_line, start_char: heredoc.location.start_char, start_column: heredoc.location.start_column, - end_line: lineno, - end_char: char_pos, - end_column: current_column + end_line: location.end_line, + end_char: location.end_char, + end_column: location.end_column ) ) end @@ -1832,10 +1954,42 @@ def on_heredoc_end(value) # :call-seq: # on_hshptn: ( # (nil | untyped) constant, - # Array[[Label, untyped]] keywords, + # Array[[Label | StringContent, untyped]] keywords, # (nil | VarField) keyword_rest # ) -> HshPtn def on_hshptn(constant, keywords, keyword_rest) + keywords = + (keywords || []).map do |(label, value)| + if label.is_a?(Label) + [label, value] + else + tstring_beg_index = + tokens.rindex do |token| + token.is_a?(TStringBeg) && + token.location.start_char < label.location.start_char + end + + tstring_beg = tokens.delete_at(tstring_beg_index) + + label_end_index = + tokens.rindex do |token| + token.is_a?(LabelEnd) && + token.location.start_char == label.location.end_char + end + + label_end = tokens.delete_at(label_end_index) + + [ + DynaSymbol.new( + parts: label.parts, + quote: label_end.value[0], + location: tstring_beg.location.to(label_end.location) + ), + value + ] + end + end + if keyword_rest # We're doing this to delete the token from the list so that it doesn't # confuse future patterns by thinking they have an extra ** on the end. @@ -1848,7 +2002,7 @@ def on_hshptn(constant, keywords, keyword_rest) keyword_rest = VarField.new(value: nil, location: token.location) end - parts = [constant, *keywords&.flatten(1), keyword_rest].compact + parts = [constant, *keywords.flatten(1), keyword_rest].compact # If there's no constant, there may be braces, so we're going to look for # those to get our bounds. @@ -1865,7 +2019,7 @@ def on_hshptn(constant, keywords, keyword_rest) HshPtn.new( constant: constant, - keywords: keywords || [], + keywords: keywords, keyword_rest: keyword_rest, location: parts[0].location.to(parts[-1].location) ) @@ -1896,8 +2050,15 @@ def on_if(predicate, statements, consequent) beginning = consume_keyword(:if) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -1931,7 +2092,7 @@ def on_if_mod(predicate, statement) IfNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -1975,13 +2136,22 @@ def on_in(pattern, statements, consequent) ending = consequent || consume_keyword(:end) statements_start = pattern - if (token = find_keyword(:then)) + if (token = find_keyword_between(:then, pattern, statements)) tokens.delete(token) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) + + # Ripper ignores parentheses on patterns, so we need to do the same in + # order to attach comments correctly to the pattern. + if source[start_char] == ")" + start_char = find_next_statement_start(start_char + 1) + end + statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -2106,12 +2276,19 @@ def on_lambda(params, statements) token.location.start_char > beginning.location.start_char end + if braces + opening = consume_token(TLamBeg) + closing = consume_token(RBrace) + else + opening = consume_keyword(:do) + closing = consume_keyword(:end) + end + # We need to do some special mapping here. Since ripper doesn't support - # capturing lambda var until 3.2, we need to normalize all of that here. + # capturing lambda vars, we need to normalize all of that here. params = - case params - when Paren - # In this case we've gotten to the <3.2 parentheses wrapping a set of + if params.is_a?(Paren) + # In this case we've gotten to the parentheses wrapping a set of # parameters case. Here we need to manually scan for lambda locals. range = (params.location.start_char + 1)...params.location.end_char locals = lambda_locals(source[range]) @@ -2133,27 +2310,31 @@ def on_lambda(params, statements) node.comments.concat(params.comments) node - when Params - # In this case we've gotten to the <3.2 plain set of parameters. In - # this case there cannot be lambda locals, so we will wrap the - # parameters into a lambda var that has no locals. + else + # If there are no parameters, then we didn't have anything to base the + # location information of off. Now that we have an opening of the + # block, we can correct this. + if params.empty? + opening_location = opening.location + location = + Location.fixed( + line: opening_location.start_line, + char: opening_location.start_char, + column: opening_location.start_column + ) + + params = params.copy(location: location) + end + + # In this case we've gotten to the plain set of parameters. In this + # case there cannot be lambda locals, so we will wrap the parameters + # into a lambda var that has no locals. LambdaVar.new(params: params, locals: [], location: params.location) - when LambdaVar - # In this case we've gotten to 3.2+ lambda var. In this case we don't - # need to do anything and can just the value as given. - params end - if braces - opening = consume_token(TLamBeg) - closing = consume_token(RBrace) - else - opening = consume_keyword(:do) - closing = consume_keyword(:end) - end - start_char = find_next_statement_start(opening.location.end_char) statements.bind( + self, start_char, start_char - line_counts[opening.location.end_line - 1].start, closing.location.start_char, @@ -2221,8 +2402,14 @@ def lambda_locals(source) } } + parent_line = lineno - 1 + parent_column = + consume_token(Semicolon).location.start_column - tokens[index][0][1] + tokens[(index + 1)..].each_with_object([]) do |token, locals| (lineno, column), type, value, = token + column += parent_column if lineno == 1 + lineno += parent_line # Make the state transition for the parser. If there isn't a transition # from the current state to a new state for this type, then we're in a @@ -2338,23 +2525,30 @@ def on_method_add_arg(call, arguments) # :call-seq: # on_method_add_block: ( - # (Call | Command | CommandCall) call, + # (Break | Call | Command | CommandCall, Next) call, # Block block - # ) -> MethodAddBlock + # ) -> Break | MethodAddBlock def on_method_add_block(call, block) location = call.location.to(block.location) case call + when Break, Next, ReturnNode + parts = call.arguments.parts + + node = parts.pop + copied = + node.copy(block: block, location: node.location.to(block.location)) + + copied.comments.concat(call.comments) + parts << copied + + call.copy(location: location) when Command, CommandCall node = call.copy(block: block, location: location) node.comments.concat(call.comments) node else - MethodAddBlock.new( - call: call, - block: block, - location: call.location.to(block.location) - ) + MethodAddBlock.new(call: call, block: block, location: location) end end @@ -2431,6 +2625,7 @@ def on_module(constant, bodystmt) start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[constant.location.start_line - 1].start, ending.location.start_char, @@ -2577,19 +2772,40 @@ def on_params( # have a `nil` for the value instead of a `false`. keywords&.map! { |(key, value)| [key, value || nil] } - parts = [ - *requireds, - *optionals&.flatten(1), - rest, - *posts, - *keywords&.flatten(1), - (keyword_rest if keyword_rest != :nil), - (block if block != :&) - ].compact + # Here we're going to build up a list of all of the params so that we can + # determine our location information. + parts = [] + + requireds&.each { |required| parts << required.location } + optionals&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + parts << rest.location if rest + posts&.each { |post| parts << post.location } + + keywords&.each do |(key, value)| + parts << key.location + parts << value.location if value + end + + if keyword_rest == :nil + # When we get a :nil here, it means that we have **nil syntax, which + # means this set of parameters accepts no more keyword arguments. In + # this case we need to go and find the location of these two tokens. + operator = consume_operator(:**) + parts << operator.location.to(consume_keyword(:nil).location) + elsif keyword_rest + parts << keyword_rest.location + end + + parts << block.location if block && block != :& + parts = parts.compact location = if parts.any? - parts[0].location.to(parts[-1].location) + parts[0].to(parts[-1]) else Location.fixed(line: lineno, char: char_pos, column: current_column) end @@ -2655,6 +2871,7 @@ def on_parse_error(error, *) alias on_assign_error on_parse_error alias on_class_name_error on_parse_error alias on_param_error on_parse_error + alias compile_error on_parse_error # :call-seq: # on_period: (String value) -> Period @@ -2686,7 +2903,7 @@ def on_program(statements) ) statements.body << @__end__ if @__end__ - statements.bind(0, 0, source.length, last_column) + statements.bind(self, 0, 0, source.length, last_column) program = Program.new(statements: statements, location: location) attach_comments(program, @comments) @@ -3018,8 +3235,9 @@ def on_rescue(exceptions, variable, statements, consequent) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword - start_char = find_next_statement_start(last_node.location.end_char) + start_char = find_next_statement_start(last_node.end_char) statements.bind( + self, start_char, start_char - line_counts[last_node.location.start_line - 1].start, char_pos, @@ -3040,7 +3258,7 @@ def on_rescue(exceptions, variable, statements, consequent) start_char: keyword.location.end_char + 1, start_column: keyword.location.end_column + 1, end_line: last_node.location.end_line, - end_char: last_node.location.end_char, + end_char: last_node.end_char, end_column: last_node.location.end_column ) ) @@ -3138,6 +3356,7 @@ def on_sclass(target, bodystmt) start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( + self, start_char, start_char - line_counts[target.location.start_line - 1].start, ending.location.start_char, @@ -3151,9 +3370,29 @@ def on_sclass(target, bodystmt) ) end - # def on_semicolon(value) - # value - # end + # Semicolons are tokens that get added to the token list but never get + # attached to the AST. Because of this they only need to track their + # associated location so they can be used for computing bounds. + class Semicolon + attr_reader :location + + def initialize(location) + @location = location + end + end + + # :call-seq: + # on_semicolon: (String value) -> Semicolon + def on_semicolon(value) + tokens << Semicolon.new( + Location.token( + line: lineno, + char: char_pos, + column: current_column, + size: value.size + ) + ) + end # def on_sp(value) # value @@ -3171,18 +3410,13 @@ def on_stmts_add(statements, statement) statements.location.to(statement.location) end - Statements.new( - self, - body: statements.body << statement, - location: location - ) + Statements.new(body: statements.body << statement, location: location) end # :call-seq: # on_stmts_new: () -> Statements def on_stmts_new Statements.new( - self, body: [], location: Location.fixed(line: lineno, char: char_pos, column: current_column) @@ -3247,6 +3481,7 @@ def on_string_embexpr(statements) embexpr_end = consume_token(EmbExprEnd) statements.bind( + self, embexpr_beg.location.end_char, embexpr_beg.location.end_column, embexpr_end.location.start_char, @@ -3590,8 +3825,15 @@ def on_unless(predicate, statements, consequent) beginning = consume_keyword(:unless) ending = consequent || consume_keyword(:end) - start_char = find_next_statement_start(predicate.location.end_char) + if (keyword = find_keyword_between(:then, predicate, ending)) + tokens.delete(keyword) + end + + start_char = + find_next_statement_start((keyword || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3614,7 +3856,7 @@ def on_unless_mod(predicate, statement) UnlessNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), consequent: nil, location: statement.location.to(predicate.location) ) @@ -3626,17 +3868,18 @@ def on_until(predicate, statements) beginning = consume_keyword(:until) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3658,7 +3901,7 @@ def on_until_mod(predicate, statement) UntilNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end @@ -3729,9 +3972,11 @@ def on_when(arguments, statements, consequent) statements_start = token end - start_char = find_next_statement_start(statements_start.location.end_char) + start_char = + find_next_statement_start((token || statements_start).location.end_char) statements.bind( + self, start_char, start_char - line_counts[statements_start.location.start_line - 1].start, @@ -3753,17 +3998,18 @@ def on_while(predicate, statements) beginning = consume_keyword(:while) ending = consume_keyword(:end) - # Consume the do keyword if it exists so that it doesn't get confused for - # some other block - keyword = find_keyword(:do) - if keyword && keyword.location.start_char > predicate.location.end_char && - keyword.location.end_char < ending.location.start_char - tokens.delete(keyword) - end + delimiter = + find_keyword_between(:do, predicate, statements) || + find_token_between(Semicolon, predicate, statements) + + tokens.delete(delimiter) if delimiter # Update the Statements location information - start_char = find_next_statement_start(predicate.location.end_char) + start_char = + find_next_statement_start((delimiter || predicate).location.end_char) + statements.bind( + self, start_char, start_char - line_counts[predicate.location.end_line - 1].start, ending.location.start_char, @@ -3785,7 +4031,7 @@ def on_while_mod(predicate, statement) WhileNode.new( predicate: predicate, statements: - Statements.new(self, body: [statement], location: statement.location), + Statements.new(body: [statement], location: statement.location), location: statement.location.to(predicate.location) ) end diff --git a/lib/syntax_tree/pattern.rb b/lib/syntax_tree/pattern.rb index ca49c6bf..a5e88bfa 100644 --- a/lib/syntax_tree/pattern.rb +++ b/lib/syntax_tree/pattern.rb @@ -70,6 +70,7 @@ def compile raise CompilationError, query end + raise CompilationError, query if program.nil? compile_node(program.statements.body.first.consequent.pattern) end diff --git a/lib/syntax_tree/plugin/disable_auto_ternary.rb b/lib/syntax_tree/plugin/disable_auto_ternary.rb new file mode 100644 index 00000000..dd38c783 --- /dev/null +++ b/lib/syntax_tree/plugin/disable_auto_ternary.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module SyntaxTree + class Formatter + DISABLE_AUTO_TERNARY = true + end +end diff --git a/lib/syntax_tree/pretty_print_visitor.rb b/lib/syntax_tree/pretty_print_visitor.rb new file mode 100644 index 00000000..894e0cf4 --- /dev/null +++ b/lib/syntax_tree/pretty_print_visitor.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module SyntaxTree + # This visitor pretty-prints the AST into an equivalent s-expression. + class PrettyPrintVisitor < FieldVisitor + attr_reader :q + + def initialize(q) + @q = q + end + + # This is here because we need to make sure the operator is cast to a string + # before we print it out. + def visit_binary(node) + node(node, "binary") do + field("left", node.left) + text("operator", node.operator.to_s) + field("right", node.right) + comments(node) + end + end + + # This is here to make it a little nicer to look at labels since they + # typically have their : at the end of the value. + def visit_label(node) + node(node, "label") do + q.breakable + q.text(":") + q.text(node.value[0...-1]) + comments(node) + end + end + + private + + def comments(node) + return if node.comments.empty? + + q.breakable + q.group(2, "(", ")") do + q.seplist(node.comments) { |comment| q.pp(comment) } + end + end + + def field(_name, value) + q.breakable + q.pp(value) + end + + def list(_name, values) + q.breakable + q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } + end + + def node(_node, type) + q.group(2, "(", ")") do + q.text(type) + yield + end + end + + def pairs(_name, values) + q.group(2, "(", ")") do + q.seplist(values) do |(key, value)| + q.pp(key) + + if value + q.text("=") + q.group(2) do + q.breakable("") + q.pp(value) + end + end + end + end + end + + def text(_name, value) + q.breakable + q.text(value) + end + end +end diff --git a/lib/syntax_tree/reflection.rb b/lib/syntax_tree/reflection.rb new file mode 100644 index 00000000..6955aa21 --- /dev/null +++ b/lib/syntax_tree/reflection.rb @@ -0,0 +1,257 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module is used to provide some reflection on the various types of nodes + # and their attributes. As soon as it is required it collects all of its + # information. + module Reflection + # This module represents the type of the values being passed to attributes + # of nodes. It is used as part of the documentation of the attributes. + module Type + CONSTANTS = SyntaxTree.constants.to_h { [_1, SyntaxTree.const_get(_1)] } + + # Represents an array type that holds another type. + class ArrayType + attr_reader :type + + def initialize(type) + @type = type + end + + def ===(value) + value.is_a?(Array) && value.all? { type === _1 } + end + + def inspect + "Array<#{type.inspect}>" + end + end + + # Represents a tuple type that holds a number of types in order. + class TupleType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + value.is_a?(Array) && value.length == types.length && + value.zip(types).all? { |item, type| type === item } + end + + def inspect + "[#{types.map(&:inspect).join(", ")}]" + end + end + + # Represents a union type that can be one of a number of types. + class UnionType + attr_reader :types + + def initialize(types) + @types = types + end + + def ===(value) + types.any? { _1 === value } + end + + def inspect + types.map(&:inspect).join(" | ") + end + end + + class << self + def parse(comment) + comment = comment.gsub("\n", " ") + + unless comment.start_with?("[") + raise "Comment does not start with a bracket: #{comment.inspect}" + end + + count = 1 + found = + comment.chars[1..] + .find + .with_index(1) do |char, index| + count += { "[" => 1, "]" => -1 }.fetch(char, 0) + break index if count == 0 + end + + # If we weren't able to find the end of the balanced brackets, then + # the comment is malformed. + if found.nil? + raise "Comment does not have balanced brackets: #{comment.inspect}" + end + + parse_type(comment[1...found].strip) + end + + private + + def parse_type(value) + case value + when "Integer" + Integer + when "String" + String + when "Symbol" + Symbol + when "boolean" + UnionType.new([TrueClass, FalseClass]) + when "nil" + NilClass + when ":\"::\"" + :"::" + when ":call" + :call + when ":nil" + :nil + when /\AArray\[(.+)\]\z/ + ArrayType.new(parse_type($1.strip)) + when /\A\[(.+)\]\z/ + TupleType.new($1.strip.split(/\s*,\s*/).map { parse_type(_1) }) + else + if value.include?("|") + UnionType.new(value.split(/\s*\|\s*/).map { parse_type(_1) }) + else + CONSTANTS.fetch(value.to_sym) + end + end + end + end + end + + # This class represents one of the attributes on a node in the tree. + class Attribute + attr_reader :name, :comment, :type + + def initialize(name, comment) + @name = name + @comment = comment + @type = Type.parse(comment) + end + end + + # This class represents one of our nodes in the tree. We're going to use it + # as a placeholder for collecting all of the various places that nodes are + # used. + class Node + attr_reader :name, :comment, :attributes, :visitor_method + + def initialize(name, comment, attributes, visitor_method) + @name = name + @comment = comment + @attributes = attributes + @visitor_method = visitor_method + end + end + + class << self + # This is going to hold a hash of all of the nodes in the tree. The keys + # are the names of the nodes as symbols. + attr_reader :nodes + + # This expects a node name as a symbol and returns the node object for + # that node. + def node(name) + nodes.fetch(name) + end + + private + + def parse_comments(statements, index) + statements[0...index] + .reverse_each + .take_while { _1.is_a?(SyntaxTree::Comment) } + .reverse_each + .map { _1.value[2..] } + end + end + + @nodes = {} + + # For each node, we're going to parse out its attributes and other metadata. + # We'll use this as the basis for our report. + program = + SyntaxTree.parse(SyntaxTree.read(File.expand_path("node.rb", __dir__))) + + program_statements = program.statements + main_statements = program_statements.body.last.bodystmt.statements.body + main_statements.each_with_index do |main_statement, main_statement_index| + # Ensure we are only looking at class declarations. + next unless main_statement.is_a?(SyntaxTree::ClassDeclaration) + + # Ensure we're looking at class declarations with superclasses. + superclass = main_statement.superclass + next unless superclass.is_a?(SyntaxTree::VarRef) + + # Ensure we're looking at class declarations that inherit from Node. + next unless superclass.value.value == "Node" + + # All child nodes inherit the location attr_reader from Node, so we'll add + # that to the list of attributes first. + attributes = { + location: + Attribute.new(:location, "[Location] the location of this node") + } + + # This is the name of the method tha gets called on the given visitor when + # the accept method is called on this node. + visitor_method = nil + + statements = main_statement.bodystmt.statements.body + statements.each_with_index do |statement, statement_index| + case statement + when SyntaxTree::Command + # We only use commands in node classes to define attributes. So, we + # can safely assume that we're looking at an attribute definition. + unless %w[attr_reader attr_accessor].include?(statement.message.value) + raise "Unexpected command: #{statement.message.value.inspect}" + end + + # The arguments to the command are the attributes that we're defining. + # We want to ensure that we're only defining one at a time. + if statement.arguments.parts.length != 1 + raise "Declaring more than one attribute at a time is not permitted" + end + + attribute = + Attribute.new( + statement.arguments.parts.first.value.value.to_sym, + "#{parse_comments(statements, statement_index).join("\n")}\n" + ) + + # Ensure that we don't already have an attribute named the same as + # this one, and then add it to the list of attributes. + if attributes.key?(attribute.name) + raise "Duplicate attribute: #{attribute.name}" + end + + attributes[attribute.name] = attribute + when SyntaxTree::DefNode + if statement.name.value == "accept" + call_node = statement.bodystmt.statements.body.first + visitor_method = call_node.message.value.to_sym + end + end + end + + # If we never found a visitor method, then we have an error. + raise if visitor_method.nil? + + # Finally, set it up in the hash of nodes so that we can use it later. + comments = parse_comments(main_statements, main_statement_index) + node = + Node.new( + main_statement.constant.constant.value.to_sym, + "#{comments.join("\n")}\n", + attributes, + visitor_method + ) + + @nodes[node.name] = node + end + end +end diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 340bbbdf..9e80fa7b 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.0.1" + VERSION = "6.3.0" end diff --git a/lib/syntax_tree/visitor/environment.rb b/lib/syntax_tree/visitor/environment.rb deleted file mode 100644 index b07a5203..00000000 --- a/lib/syntax_tree/visitor/environment.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # The environment class is used to keep track of local variables and arguments - # inside a particular scope - class Environment - # This class tracks the occurrences of a local variable or argument - class Local - # [Symbol] The type of the local (e.g. :argument, :variable) - attr_reader :type - - # [Array[Location]] The locations of all definitions and assignments of - # this local - attr_reader :definitions - - # [Array[Location]] The locations of all usages of this local - attr_reader :usages - - # initialize: (Symbol type) -> void - def initialize(type) - @type = type - @definitions = [] - @usages = [] - end - - # add_definition: (Location location) -> void - def add_definition(location) - @definitions << location - end - - # add_usage: (Location location) -> void - def add_usage(location) - @usages << location - end - end - - # [Array[Local]] The local variables and arguments defined in this - # environment - attr_reader :locals - - # [Environment | nil] The parent environment - attr_reader :parent - - # initialize: (Environment | nil parent) -> void - def initialize(parent = nil) - @locals = {} - @parent = parent - end - - # Adding a local definition will either insert a new entry in the locals - # hash or append a new definition location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_definition: (Ident | Label identifier, Symbol type) -> void - def add_local_definition(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_definition(identifier.location) - end - - # Adding a local usage will either insert a new entry in the locals - # hash or append a new usage location to an existing local. Notice that - # it's not possible to change the type of a local after it has been - # registered - # add_local_usage: (Ident | Label identifier, Symbol type) -> void - def add_local_usage(identifier, type) - name = identifier.value.delete_suffix(":") - - @locals[name] ||= Local.new(type) - @locals[name].add_usage(identifier.location) - end - - # Try to find the local given its name in this environment or any of its - # parents - # find_local: (String name) -> Local | nil - def find_local(name) - local = @locals[name] - return local unless local.nil? - - @parent&.find_local(name) - end - end -end diff --git a/lib/syntax_tree/visitor/json_visitor.rb b/lib/syntax_tree/visitor/json_visitor.rb deleted file mode 100644 index b516980c..00000000 --- a/lib/syntax_tree/visitor/json_visitor.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a hash that contains only primitives - # that can be easily serialized into JSON. - class JSONVisitor < FieldVisitor - attr_reader :target - - def initialize - @target = nil - end - - private - - def comments(node) - target[:comments] = visit_all(node.comments) - end - - def field(name, value) - target[name] = value.is_a?(Node) ? visit(value) : value - end - - def list(name, values) - target[name] = visit_all(values) - end - - def node(node, type) - previous = @target - @target = { type: type, location: visit_location(node.location) } - yield - @target - ensure - @target = previous - end - - def pairs(name, values) - target[name] = values.map { |(key, value)| [visit(key), visit(value)] } - end - - def text(name, value) - target[name] = value - end - - def visit_location(location) - [ - location.start_line, - location.start_char, - location.end_line, - location.end_char - ] - end - end - end -end diff --git a/lib/syntax_tree/visitor/match_visitor.rb b/lib/syntax_tree/visitor/match_visitor.rb deleted file mode 100644 index e0bdaf08..00000000 --- a/lib/syntax_tree/visitor/match_visitor.rb +++ /dev/null @@ -1,122 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor transforms the AST into a Ruby pattern matching expression - # that would match correctly against the AST. - class MatchVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - def visit(node) - case node - when Node - super - when String - # pp will split up a string on newlines and concat them together using - # a "+" operator. This breaks the pattern matching expression. So - # instead we're going to check here for strings and manually put the - # entire value into the output buffer. - q.text(node.inspect) - else - node.pretty_print(q) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.nest(0) do - q.text("comments: [") - q.indent do - q.breakable("") - q.seplist(node.comments) { |comment| visit(comment) } - end - q.breakable("") - q.text("]") - end - end - - def field(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - visit(value) - end - end - - def list(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) { |value| visit(value) } - end - q.breakable("") - q.text("]") - end - end - - def node(node, _type) - items = [] - q.with_target(items) { yield } - - if items.empty? - q.text(node.class.name) - return - end - - q.group do - q.text(node.class.name) - q.text("[") - q.indent do - q.breakable("") - q.seplist(items) { |item| q.target << item } - end - q.breakable("") - q.text("]") - end - end - - def pairs(name, values) - q.group do - q.text(name) - q.text(": [") - q.indent do - q.breakable("") - q.seplist(values) do |(key, value)| - q.group do - q.text("[") - q.indent do - q.breakable("") - visit(key) - q.text(",") - q.breakable - visit(value || nil) - end - q.breakable("") - q.text("]") - end - end - end - q.breakable("") - q.text("]") - end - end - - def text(name, value) - q.nest(0) do - q.text(name) - q.text(": ") - value.pretty_print(q) - end - end - end - end -end diff --git a/lib/syntax_tree/visitor/pretty_print_visitor.rb b/lib/syntax_tree/visitor/pretty_print_visitor.rb deleted file mode 100644 index 674e3aac..00000000 --- a/lib/syntax_tree/visitor/pretty_print_visitor.rb +++ /dev/null @@ -1,85 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This visitor pretty-prints the AST into an equivalent s-expression. - class PrettyPrintVisitor < FieldVisitor - attr_reader :q - - def initialize(q) - @q = q - end - - # This is here because we need to make sure the operator is cast to a - # string before we print it out. - def visit_binary(node) - node(node, "binary") do - field("left", node.left) - text("operator", node.operator.to_s) - field("right", node.right) - comments(node) - end - end - - # This is here to make it a little nicer to look at labels since they - # typically have their : at the end of the value. - def visit_label(node) - node(node, "label") do - q.breakable - q.text(":") - q.text(node.value[0...-1]) - comments(node) - end - end - - private - - def comments(node) - return if node.comments.empty? - - q.breakable - q.group(2, "(", ")") do - q.seplist(node.comments) { |comment| q.pp(comment) } - end - end - - def field(_name, value) - q.breakable - q.pp(value) - end - - def list(_name, values) - q.breakable - q.group(2, "(", ")") { q.seplist(values) { |value| q.pp(value) } } - end - - def node(_node, type) - q.group(2, "(", ")") do - q.text(type) - yield - end - end - - def pairs(_name, values) - q.group(2, "(", ")") do - q.seplist(values) do |(key, value)| - q.pp(key) - - if value - q.text("=") - q.group(2) do - q.breakable("") - q.pp(value) - end - end - end - end - end - - def text(_name, value) - q.breakable - q.text(value) - end - end - end -end diff --git a/lib/syntax_tree/visitor/with_environment.rb b/lib/syntax_tree/visitor/with_environment.rb deleted file mode 100644 index 59033d50..00000000 --- a/lib/syntax_tree/visitor/with_environment.rb +++ /dev/null @@ -1,140 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # WithEnvironment is a module intended to be included in classes inheriting - # from Visitor. The module overrides a few visit methods to automatically keep - # track of local variables and arguments defined in the current environment. - # Example usage: - # class MyVisitor < Visitor - # include WithEnvironment - # - # def visit_ident(node) - # # Check if we're visiting an identifier for an argument, a local - # variable or something else - # local = current_environment.find_local(node) - # - # if local.type == :argument - # # handle identifiers for arguments - # elsif local.type == :variable - # # handle identifiers for variables - # else - # # handle other identifiers, such as method names - # end - # end - module WithEnvironment - def current_environment - @current_environment ||= Environment.new - end - - def with_new_environment - previous_environment = @current_environment - @current_environment = Environment.new(previous_environment) - yield - ensure - @current_environment = previous_environment - end - - # Visits for nodes that create new environments, such as classes, modules - # and method definitions - def visit_class(node) - with_new_environment { super } - end - - def visit_module(node) - with_new_environment { super } - end - - # When we find a method invocation with a block, only the code that happens - # inside of the block needs a fresh environment. The method invocation - # itself happens in the same environment - def visit_method_add_block(node) - visit(node.call) - with_new_environment { visit(node.block) } - end - - def visit_def(node) - with_new_environment { super } - end - - # Visit for keeping track of local arguments, such as method and block - # arguments - def visit_params(node) - add_argument_definitions(node.requireds) - - node.posts.each do |param| - current_environment.add_local_definition(param, :argument) - end - - node.keywords.each do |param| - current_environment.add_local_definition(param.first, :argument) - end - - node.optionals.each do |param| - current_environment.add_local_definition(param.first, :argument) - end - - super - end - - def visit_rest_param(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - def visit_kwrest_param(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - def visit_blockarg(node) - name = node.name - current_environment.add_local_definition(name, :argument) if name - - super - end - - # Visit for keeping track of local variable definitions - def visit_var_field(node) - value = node.value - - if value.is_a?(SyntaxTree::Ident) - current_environment.add_local_definition(value, :variable) - end - - super - end - - alias visit_pinned_var_ref visit_var_field - - # Visits for keeping track of variable and argument usages - def visit_var_ref(node) - value = node.value - - if value.is_a?(SyntaxTree::Ident) - definition = current_environment.find_local(value.value) - - if definition - current_environment.add_local_usage(value, definition.type) - end - end - - super - end - - private - - def add_argument_definitions(list) - list.each do |param| - if param.is_a?(SyntaxTree::MLHSParen) - add_argument_definitions(param.contents.parts) - else - current_environment.add_local_definition(param, :argument) - end - end - end - end -end diff --git a/lib/syntax_tree/with_scope.rb b/lib/syntax_tree/with_scope.rb new file mode 100644 index 00000000..8c4908f3 --- /dev/null +++ b/lib/syntax_tree/with_scope.rb @@ -0,0 +1,311 @@ +# frozen_string_literal: true + +module SyntaxTree + # WithScope is a module intended to be included in classes inheriting from + # Visitor. The module overrides a few visit methods to automatically keep + # track of local variables and arguments defined in the current scope. + # Example usage: + # + # class MyVisitor < Visitor + # include WithScope + # + # def visit_ident(node) + # # Check if we're visiting an identifier for an argument, a local + # # variable or something else + # local = current_scope.find_local(node) + # + # if local.type == :argument + # # handle identifiers for arguments + # elsif local.type == :variable + # # handle identifiers for variables + # else + # # handle other identifiers, such as method names + # end + # end + # end + # + module WithScope + # The scope class is used to keep track of local variables and arguments + # inside a particular scope. + class Scope + # This class tracks the occurrences of a local variable or argument. + class Local + # [Symbol] The type of the local (e.g. :argument, :variable) + attr_reader :type + + # [Array[Location]] The locations of all definitions and assignments of + # this local + attr_reader :definitions + + # [Array[Location]] The locations of all usages of this local + attr_reader :usages + + def initialize(type) + @type = type + @definitions = [] + @usages = [] + end + + def add_definition(location) + @definitions << location + end + + def add_usage(location) + @usages << location + end + end + + # [Integer] a unique identifier for this scope + attr_reader :id + + # [scope | nil] The parent scope + attr_reader :parent + + # [Hash[String, Local]] The local variables and arguments defined in this + # scope + attr_reader :locals + + def initialize(id, parent = nil) + @id = id + @parent = parent + @locals = {} + end + + # Adding a local definition will either insert a new entry in the locals + # hash or append a new definition location to an existing local. Notice + # that it's not possible to change the type of a local after it has been + # registered. + def add_local_definition(identifier, type) + name = identifier.value.delete_suffix(":") + + local = + if type == :argument + locals[name] ||= Local.new(type) + else + resolve_local(name, type) + end + + local.add_definition(identifier.location) + end + + # Adding a local usage will either insert a new entry in the locals + # hash or append a new usage location to an existing local. Notice that + # it's not possible to change the type of a local after it has been + # registered. + def add_local_usage(identifier, type) + name = identifier.value.delete_suffix(":") + resolve_local(name, type).add_usage(identifier.location) + end + + # Try to find the local given its name in this scope or any of its + # parents. + def find_local(name) + locals[name] || parent&.find_local(name) + end + + private + + def resolve_local(name, type) + local = find_local(name) + + unless local + local = Local.new(type) + locals[name] = local + end + + local + end + end + + attr_reader :current_scope + + def initialize(*args, **kwargs, &block) + super + + @current_scope = Scope.new(0) + @next_scope_id = 0 + end + + # Visits for nodes that create new scopes, such as classes, modules + # and method definitions. + def visit_class(node) + with_scope { super } + end + + def visit_module(node) + with_scope { super } + end + + # When we find a method invocation with a block, only the code that happens + # inside of the block needs a fresh scope. The method invocation + # itself happens in the same scope. + def visit_method_add_block(node) + visit(node.call) + with_scope(current_scope) { visit(node.block) } + end + + def visit_def(node) + with_scope { super } + end + + # Visit for keeping track of local arguments, such as method and block + # arguments. + def visit_params(node) + add_argument_definitions(node.requireds) + add_argument_definitions(node.posts) + + node.keywords.each do |param| + current_scope.add_local_definition(param.first, :argument) + end + + node.optionals.each do |param| + current_scope.add_local_definition(param.first, :argument) + end + + super + end + + def visit_rest_param(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_kwrest_param(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_blockarg(node) + name = node.name + current_scope.add_local_definition(name, :argument) if name + + super + end + + def visit_block_var(node) + node.locals.each do |local| + current_scope.add_local_definition(local, :variable) + end + + super + end + alias visit_lambda_var visit_block_var + + # Visit for keeping track of local variable definitions + def visit_var_field(node) + value = node.value + current_scope.add_local_definition(value, :variable) if value.is_a?(Ident) + + super + end + + # Visit for keeping track of local variable definitions + def visit_pinned_var_ref(node) + value = node.value + current_scope.add_local_usage(value, :variable) if value.is_a?(Ident) + + super + end + + # Visits for keeping track of variable and argument usages + def visit_var_ref(node) + value = node.value + + if value.is_a?(Ident) + definition = current_scope.find_local(value.value) + current_scope.add_local_usage(value, definition.type) if definition + end + + super + end + + # When using regex named capture groups, vcalls might actually be a variable + def visit_vcall(node) + value = node.value + definition = current_scope.find_local(value.value) + current_scope.add_local_usage(value, definition.type) if definition + + super + end + + # Visit for capturing local variables defined in regex named capture groups + def visit_binary(node) + if node.operator == :=~ + left = node.left + + if left.is_a?(RegexpLiteral) && left.parts.length == 1 && + left.parts.first.is_a?(TStringContent) + content = left.parts.first + + value = content.value + location = content.location + start_line = location.start_line + + Regexp + .new(value, Regexp::FIXEDENCODING) + .names + .each do |name| + offset = value.index(/\(\?<#{Regexp.escape(name)}>/) + line = start_line + value[0...offset].count("\n") + + # We need to add 3 to account for these three characters + # prefixing a named capture (?< + column = location.start_column + offset + 3 + if value[0...offset].include?("\n") + column = + value[0...offset].length - value[0...offset].rindex("\n") + + 3 - 1 + end + + ident_location = + Location.new( + start_line: line, + start_char: location.start_char + offset, + start_column: column, + end_line: line, + end_char: location.start_char + offset + name.length, + end_column: column + name.length + ) + + identifier = Ident.new(value: name, location: ident_location) + current_scope.add_local_definition(identifier, :variable) + end + end + end + + super + end + + private + + def add_argument_definitions(list) + list.each do |param| + case param + when ArgStar + value = param.value + current_scope.add_local_definition(value, :argument) if value + when MLHSParen + add_argument_definitions(param.contents.parts) + else + current_scope.add_local_definition(param, :argument) + end + end + end + + def next_scope_id + @next_scope_id += 1 + end + + def with_scope(parent_scope = nil) + previous_scope = @current_scope + @current_scope = Scope.new(next_scope_id, parent_scope) + yield + ensure + @current_scope = previous_scope + end + end +end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb deleted file mode 100644 index 97592d4d..00000000 --- a/lib/syntax_tree/yarv.rb +++ /dev/null @@ -1,287 +0,0 @@ -# frozen_string_literal: true - -require "forwardable" - -module SyntaxTree - # This module provides an object representation of the YARV bytecode. - module YARV - class VM - class Jump - attr_reader :name - - def initialize(name) - @name = name - end - end - - class Leave - attr_reader :value - - def initialize(value) - @value = value - end - end - - class Frame - attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars - - def initialize(iseq, parent, stack_index, _self, nesting) - @iseq = iseq - @parent = parent - @stack_index = stack_index - @_self = _self - @nesting = nesting - @svars = {} - end - end - - class TopFrame < Frame - def initialize(iseq) - super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) - end - end - - class BlockFrame < Frame - def initialize(iseq, parent, stack_index) - super(iseq, parent, stack_index, parent._self, parent.nesting) - end - end - - class MethodFrame < Frame - attr_reader :name, :block - - def initialize(iseq, parent, stack_index, _self, name, block) - super(iseq, parent, stack_index, _self, parent.nesting) - @name = name - @block = block - end - end - - class ClassFrame < Frame - def initialize(iseq, parent, stack_index, _self) - super(iseq, parent, stack_index, _self, parent.nesting + [_self]) - end - end - - class FrozenCore - define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } - - define_method("core#hash_merge_ptr") do |hash, *values| - hash.merge(values.each_slice(2).to_h) - end - - define_method("core#set_method_alias") do |clazz, new_name, old_name| - clazz.alias_method(new_name, old_name) - end - - define_method("core#set_variable_alias") do |new_name, old_name| - # Using eval here since there isn't a reflection API to be able to - # alias global variables. - eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) - end - - define_method("core#set_postexe") { |&block| END { block.call } } - - define_method("core#undef_method") do |clazz, name| - clazz.undef_method(name) - end - end - - FROZEN_CORE = FrozenCore.new.freeze - - extend Forwardable - - attr_reader :stack - def_delegators :stack, :push, :pop - - attr_reader :frame - def_delegators :frame, :_self - - def initialize - @stack = [] - @frame = nil - end - - ########################################################################## - # Helper methods for frames - ########################################################################## - - def run_frame(frame) - # First, set the current frame to the given value. - @frame = frame - - # Next, set up the local table for the frame. This is actually incorrect - # as it could use the values already on the stack, but for now we're - # just doing this for simplicity. - frame.iseq.local_table.size.times { push(nil) } - - # Yield so that some frame-specific setup can be done. - yield if block_given? - - # This hash is going to hold a mapping of label names to their - # respective indices in our instruction list. - labels = {} - - # This array is going to hold our instructions. - insns = [] - - # Here we're going to preprocess the instruction list from the - # instruction sequence to set up the labels hash and the insns array. - frame.iseq.insns.each do |insn| - case insn - when Integer, Symbol - # skip - when InstructionSequence::Label - labels[insn.name] = insns.length - else - insns << insn - end - end - - # Finally we can execute the instructions one at a time. If they return - # jumps or leaves we will handle those appropriately. - pc = 0 - while pc < insns.length - insn = insns[pc] - pc += 1 - - case (result = insn.call(self)) - when Jump - pc = labels[result.name] - when Leave - return result.value - end - end - ensure - @stack = stack[0...frame.stack_index] - @frame = frame.parent - end - - def run_top_frame(iseq) - run_frame(TopFrame.new(iseq)) - end - - def run_block_frame(iseq, *args, &block) - run_frame(BlockFrame.new(iseq, frame, stack.length)) do - locals = [*args, block] - iseq.local_table.size.times do |index| - local_set(index, 0, locals.shift) - end - end - end - - def run_class_frame(iseq, clazz) - run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) - end - - def run_method_frame(name, iseq, _self, *args, **kwargs, &block) - run_frame( - MethodFrame.new(iseq, frame, stack.length, _self, name, block) - ) do - locals = [*args, block] - - if iseq.argument_options[:keyword] - # First, set up the keyword bits array. - keyword_bits = - iseq.argument_options[:keyword].map do |config| - kwargs.key?(config.is_a?(Array) ? config[0] : config) - end - - iseq.local_table.locals.each_with_index do |local, index| - # If this is the keyword bits local, then set it appropriately. - if local.name == 2 - locals.insert(index, keyword_bits) - next - end - - # First, find the configuration for this local in the keywords - # list if it exists. - name = local.name - config = - iseq.argument_options[:keyword].find do |keyword| - keyword.is_a?(Array) ? keyword[0] == name : keyword == name - end - - # If the configuration doesn't exist, then the local is not a - # keyword local. - next unless config - - if !config.is_a?(Array) - # required keyword - locals.insert(index, kwargs.fetch(name)) - elsif !config[1].nil? - # optional keyword with embedded default value - locals.insert(index, kwargs.fetch(name, config[1])) - else - # optional keyword with expression default value - locals.insert(index, nil) - end - end - end - - iseq.local_table.size.times do |index| - local_set(index, 0, locals.shift) - end - end - end - - ########################################################################## - # Helper methods for instructions - ########################################################################## - - def const_base - frame.nesting.last - end - - def frame_at(level) - current = frame - level.times { current = current.parent } - current - end - - def frame_svar - current = frame - current = current.parent while current.is_a?(BlockFrame) - current - end - - def frame_yield - current = frame - current = current.parent until current.is_a?(MethodFrame) - current - end - - def frozen_core - FROZEN_CORE - end - - def jump(label) - Jump.new(label.name) - end - - def leave - Leave.new(pop) - end - - def local_get(index, level) - stack[frame_at(level).stack_index + index] - end - - def local_set(index, level, value) - stack[frame_at(level).stack_index + index] = value - end - end - - # Compile the given source into a YARV instruction sequence. - def self.compile(source, options = Compiler::Options.new) - SyntaxTree.parse(source).accept(Compiler.new(options)) - end - - # Compile and interpret the given source. - def self.interpret(source, options = Compiler::Options.new) - iseq = RubyVM::InstructionSequence.compile(source, **options) - iseq = InstructionSequence.from(iseq.to_a) - VM.new.run_top_frame(iseq) - end - end -end diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb deleted file mode 100644 index efb179c1..00000000 --- a/lib/syntax_tree/yarv/assembler.rb +++ /dev/null @@ -1,459 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - class Assembler - class ObjectVisitor < Compiler::RubyVisitor - def visit_dyna_symbol(node) - if node.parts.empty? - :"" - else - raise CompilationError - end - end - - def visit_string_literal(node) - case node.parts.length - when 0 - "" - when 1 - raise CompilationError unless node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - raise CompilationError - end - end - end - - CALLDATA_FLAGS = { - "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, - "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, - "FCALL" => CallData::CALL_FCALL, - "VCALL" => CallData::CALL_VCALL, - "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, - "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, - "KWARG" => CallData::CALL_KWARG, - "KW_SPLAT" => CallData::CALL_KW_SPLAT, - "TAILCALL" => CallData::CALL_TAILCALL, - "SUPER" => CallData::CALL_SUPER, - "ZSUPER" => CallData::CALL_ZSUPER, - "OPT_SEND" => CallData::CALL_OPT_SEND, - "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT - }.freeze - - DEFINED_TYPES = [ - nil, - "nil", - "instance-variable", - "local-variable", - "global-variable", - "class variable", - "constant", - "method", - "yield", - "super", - "self", - "true", - "false", - "assignment", - "expression", - "ref", - "func", - "constant-from" - ].freeze - - attr_reader :filepath - - def initialize(filepath) - @filepath = filepath - end - - def assemble - iseq = InstructionSequence.new(:top, "
", nil, Location.default) - assemble_iseq(iseq, File.readlines(filepath, chomp: true)) - - iseq.compile! - iseq - end - - def self.assemble(filepath) - new(filepath).assemble - end - - private - - def assemble_iseq(iseq, lines) - labels = Hash.new { |hash, name| hash[name] = iseq.label } - line_index = 0 - - while line_index < lines.length - line = lines[line_index] - line_index += 1 - - case line.strip - when "", /^;/ - # skip over blank lines and comments - next - when /^(\w+):$/ - # create labels - iseq.push(labels[$1]) - next - when /^__END__/ - # skip over the rest of the file when we hit __END__ - return - end - - insn, operands = line.split(" ", 2) - - case insn - when "adjuststack" - iseq.adjuststack(parse_number(operands)) - when "anytostring" - iseq.anytostring - when "branchif" - iseq.branchif(labels[operands]) - when "branchnil" - iseq.branchnil(labels[operands]) - when "branchunless" - iseq.branchunless(labels[operands]) - when "checkkeyword" - kwbits_index, keyword_index = operands.split(/,\s*/) - iseq.checkkeyword( - parse_number(kwbits_index), - parse_number(keyword_index) - ) - when "checkmatch" - iseq.checkmatch(parse_number(operands)) - when "checktype" - iseq.checktype(parse_number(operands)) - when "concatarray" - iseq.concatarray - when "concatstrings" - iseq.concatstrings(parse_number(operands)) - when "defineclass" - body = parse_nested(lines[line_index..]) - line_index += body.length - - name_value, flags_value = operands.split(/,\s*/) - name = parse_symbol(name_value) - flags = parse_number(flags_value) - - class_iseq = iseq.class_child_iseq(name.to_s, Location.default) - assemble_iseq(class_iseq, body) - iseq.defineclass(name, class_iseq, flags) - when "defined" - type, object, message = operands.split(/,\s*/) - iseq.defined( - DEFINED_TYPES.index(type), - parse_symbol(object), - parse_string(message) - ) - when "definemethod" - body = parse_nested(lines[line_index..]) - line_index += body.length - - name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, Location.default) - assemble_iseq(method_iseq, body) - - iseq.definemethod(name, method_iseq) - when "definesmethod" - body = parse_nested(lines[line_index..]) - line_index += body.length - - name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, Location.default) - - assemble_iseq(method_iseq, body) - iseq.definesmethod(name, method_iseq) - when "dup" - iseq.dup - when "dupn" - iseq.dupn(parse_number(operands)) - when "duparray" - iseq.duparray(parse_type(operands, Array)) - when "duphash" - iseq.duphash(parse_type(operands, Hash)) - when "expandarray" - number, flags = operands.split(/,\s*/) - iseq.expandarray(parse_number(number), parse_number(flags)) - when "getblockparam" - lookup = find_local(iseq, operands) - iseq.getblockparam(lookup.index, lookup.level) - when "getblockparamproxy" - lookup = find_local(iseq, operands) - iseq.getblockparamproxy(lookup.index, lookup.level) - when "getclassvariable" - iseq.getclassvariable(parse_symbol(operands)) - when "getconstant" - iseq.getconstant(parse_symbol(operands)) - when "getglobal" - iseq.getglobal(parse_symbol(operands)) - when "getinstancevariable" - iseq.getinstancevariable(parse_symbol(operands)) - when "getlocal" - lookup = find_local(iseq, operands) - iseq.getlocal(lookup.index, lookup.level) - when "getspecial" - key, type = operands.split(/,\s*/) - iseq.getspecial(parse_number(key), parse_number(type)) - when "intern" - iseq.intern - when "invokeblock" - iseq.invokeblock( - operands ? parse_calldata(operands) : YARV.calldata(nil, 0) - ) - when "invokesuper" - calldata = - if operands - parse_calldata(operands) - else - YARV.calldata( - nil, - 0, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | - CallData::CALL_SUPER - ) - end - - block_iseq = - if lines[line_index].start_with?(" ") - body = parse_nested(lines[line_index..]) - line_index += body.length - - block_iseq = iseq.block_child_iseq(Location.default) - assemble_iseq(block_iseq, body) - block_iseq - end - - iseq.invokesuper(calldata, block_iseq) - when "jump" - iseq.jump(labels[operands]) - when "leave" - iseq.leave - when "newarray" - iseq.newarray(parse_number(operands)) - when "newarraykwsplat" - iseq.newarraykwsplat(parse_number(operands)) - when "newhash" - iseq.newhash(parse_number(operands)) - when "newrange" - iseq.newrange(parse_options(operands, [0, 1])) - when "nop" - iseq.nop - when "objtostring" - iseq.objtostring(YARV.calldata(:to_s)) - when "once" - block_iseq = - if lines[line_index].start_with?(" ") - body = parse_nested(lines[line_index..]) - line_index += body.length - - block_iseq = iseq.block_child_iseq(Location.default) - assemble_iseq(block_iseq, body) - block_iseq - end - - iseq.once(block_iseq, iseq.inline_storage) - when "opt_and" - iseq.send(YARV.calldata(:&, 1)) - when "opt_aref" - iseq.send(YARV.calldata(:[], 1)) - when "opt_aref_with" - iseq.opt_aref_with(parse_string(operands), YARV.calldata(:[], 1)) - when "opt_aset" - iseq.send(YARV.calldata(:[]=, 2)) - when "opt_aset_with" - iseq.opt_aset_with(parse_string(operands), YARV.calldata(:[]=, 2)) - when "opt_case_dispatch" - cdhash_value, else_label_value = operands.split(/\s*\},\s*/) - cdhash_value.sub!(/\A\{/, "") - - pairs = - cdhash_value - .split(/\s*,\s*/) - .map! { |pair| pair.split(/\s*=>\s*/) } - - cdhash = pairs.to_h { |value, nm| [parse(value), labels[nm]] } - else_label = labels[else_label_value] - - iseq.opt_case_dispatch(cdhash, else_label) - when "opt_div" - iseq.send(YARV.calldata(:/, 1)) - when "opt_empty_p" - iseq.send(YARV.calldata(:empty?)) - when "opt_eq" - iseq.send(YARV.calldata(:==, 1)) - when "opt_ge" - iseq.send(YARV.calldata(:>=, 1)) - when "opt_gt" - iseq.send(YARV.calldata(:>, 1)) - when "opt_getconstant_path" - iseq.opt_getconstant_path(parse_type(operands, Array)) - when "opt_le" - iseq.send(YARV.calldata(:<=, 1)) - when "opt_length" - iseq.send(YARV.calldata(:length)) - when "opt_lt" - iseq.send(YARV.calldata(:<, 1)) - when "opt_ltlt" - iseq.send(YARV.calldata(:<<, 1)) - when "opt_minus" - iseq.send(YARV.calldata(:-, 1)) - when "opt_mod" - iseq.send(YARV.calldata(:%, 1)) - when "opt_mult" - iseq.send(YARV.calldata(:*, 1)) - when "opt_neq" - iseq.send(YARV.calldata(:!=, 1)) - when "opt_newarray_max" - iseq.newarray(parse_number(operands)) - iseq.send(YARV.calldata(:max)) - when "opt_newarray_min" - iseq.newarray(parse_number(operands)) - iseq.send(YARV.calldata(:min)) - when "opt_nil_p" - iseq.send(YARV.calldata(:nil?)) - when "opt_not" - iseq.send(YARV.calldata(:!)) - when "opt_or" - iseq.send(YARV.calldata(:|, 1)) - when "opt_plus" - iseq.send(YARV.calldata(:+, 1)) - when "opt_regexpmatch2" - iseq.send(YARV.calldata(:=~, 1)) - when "opt_reverse" - iseq.send(YARV.calldata(:reverse)) - when "opt_send_without_block" - iseq.send(parse_calldata(operands)) - when "opt_size" - iseq.send(YARV.calldata(:size)) - when "opt_str_freeze" - iseq.putstring(parse_string(operands)) - iseq.send(YARV.calldata(:freeze)) - when "opt_str_uminus" - iseq.putstring(parse_string(operands)) - iseq.send(YARV.calldata(:-@)) - when "opt_succ" - iseq.send(YARV.calldata(:succ)) - when "pop" - iseq.pop - when "putnil" - iseq.putnil - when "putobject" - iseq.putobject(parse(operands)) - when "putself" - iseq.putself - when "putspecialobject" - iseq.putspecialobject(parse_options(operands, [1, 2, 3])) - when "putstring" - iseq.putstring(parse_string(operands)) - when "send" - block_iseq = - if lines[line_index].start_with?(" ") - body = parse_nested(lines[line_index..]) - line_index += body.length - - block_iseq = iseq.block_child_iseq(Location.default) - assemble_iseq(block_iseq, body) - block_iseq - end - - iseq.send(parse_calldata(operands), block_iseq) - when "setblockparam" - lookup = find_local(iseq, operands) - iseq.setblockparam(lookup.index, lookup.level) - when "setconstant" - iseq.setconstant(parse_symbol(operands)) - when "setglobal" - iseq.setglobal(parse_symbol(operands)) - when "setlocal" - lookup = find_local(iseq, operands) - iseq.setlocal(lookup.index, lookup.level) - when "setn" - iseq.setn(parse_number(operands)) - when "setclassvariable" - iseq.setclassvariable(parse_symbol(operands)) - when "setinstancevariable" - iseq.setinstancevariable(parse_symbol(operands)) - when "setspecial" - iseq.setspecial(parse_number(operands)) - when "splatarray" - iseq.splatarray(parse_options(operands, [true, false])) - when "swap" - iseq.swap - when "throw" - iseq.throw(parse_number(operands)) - when "topn" - iseq.topn(parse_number(operands)) - when "toregexp" - options, length = operands.split(", ") - iseq.toregexp(parse_number(options), parse_number(length)) - when "ARG_REQ" - iseq.argument_size += 1 - iseq.local_table.plain(operands.to_sym) - when "ARG_BLOCK" - iseq.argument_options[:block_start] = iseq.argument_size - iseq.local_table.block(operands.to_sym) - iseq.argument_size += 1 - else - raise "Could not understand: #{line}" - end - end - end - - def find_local(iseq, operands) - name_string, level_string = operands.split(/,\s*/) - name = name_string.to_sym - level = level_string&.to_i || 0 - - iseq.local_table.plain(name) - iseq.local_table.find(name, level) - end - - def parse(value) - program = SyntaxTree.parse(value) - raise if program.statements.body.length != 1 - - program.statements.body.first.accept(ObjectVisitor.new) - end - - def parse_options(value, options) - parse(value).tap { raise unless options.include?(_1) } - end - - def parse_type(value, type) - parse(value).tap { raise unless _1.is_a?(type) } - end - - def parse_number(value) - parse_type(value, Integer) - end - - def parse_string(value) - parse_type(value, String) - end - - def parse_symbol(value) - parse_type(value, Symbol) - end - - def parse_nested(lines) - body = lines.take_while { |line| line.match?(/^($|;| )/) } - body.map! { |line| line.delete_prefix!(" ") || +"" } - end - - def parse_calldata(value) - message, argc_value, flags_value = value.split - flags = - if flags_value - flags_value.split("|").map(&CALLDATA_FLAGS).inject(:|) - else - CallData::CALL_ARGS_SIMPLE - end - - YARV.calldata(message.to_sym, argc_value&.to_i || 0, flags) - end - end - end -end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb deleted file mode 100644 index f642fb2f..00000000 --- a/lib/syntax_tree/yarv/bf.rb +++ /dev/null @@ -1,179 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # Parses the given source code into a syntax tree, compiles that syntax tree - # into YARV bytecode. - class Bf - attr_reader :source - - def initialize(source) - @source = source - end - - def compile - # Set up the top-level instruction sequence that will be returned. - iseq = InstructionSequence.new(:top, "", nil, location) - - # Set up the $tape global variable that will hold our state. - iseq.duphash({ 0 => 0 }) - iseq.setglobal(:$tape) - iseq.getglobal(:$tape) - iseq.putobject(0) - iseq.send(YARV.calldata(:default=, 1)) - - # Set up the $cursor global variable that will hold the current position - # in the tape. - iseq.putobject(0) - iseq.setglobal(:$cursor) - - stack = [] - source - .each_char - .chunk do |char| - # For each character, we're going to assign a type to it. This - # allows a couple of optimizations to be made by combining multiple - # instructions into single instructions, e.g., +++ becomes a single - # change_by(3) instruction. - case char - when "+", "-" - :change - when ">", "<" - :shift - when "." - :output - when "," - :input - when "[", "]" - :loop - else - :ignored - end - end - .each do |type, chunk| - # For each chunk, we're going to emit the appropriate instruction. - case type - when :change - change_by(iseq, chunk.count("+") - chunk.count("-")) - when :shift - shift_by(iseq, chunk.count(">") - chunk.count("<")) - when :output - chunk.length.times { output_char(iseq) } - when :input - chunk.length.times { input_char(iseq) } - when :loop - chunk.each do |char| - case char - when "[" - stack << loop_start(iseq) - when "]" - loop_end(iseq, *stack.pop) - end - end - end - end - - iseq.leave - iseq.compile! - iseq - end - - private - - # This is the location of the top instruction sequence, derived from the - # source string. - def location - Location.new( - start_line: 1, - start_char: 0, - start_column: 0, - end_line: source.count("\n") + 1, - end_char: source.size, - end_column: source.size - (source.rindex("\n") || 0) - 1 - ) - end - - # $tape[$cursor] += value - def change_by(iseq, value) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - - if value < 0 - iseq.putobject(-value) - iseq.send(YARV.calldata(:-, 1)) - else - iseq.putobject(value) - iseq.send(YARV.calldata(:+, 1)) - end - - iseq.send(YARV.calldata(:[]=, 2)) - end - - # $cursor += value - def shift_by(iseq, value) - iseq.getglobal(:$cursor) - - if value < 0 - iseq.putobject(-value) - iseq.send(YARV.calldata(:-, 1)) - else - iseq.putobject(value) - iseq.send(YARV.calldata(:+, 1)) - end - - iseq.setglobal(:$cursor) - end - - # $stdout.putc($tape[$cursor].chr) - def output_char(iseq) - iseq.getglobal(:$stdout) - - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - iseq.send(YARV.calldata(:chr)) - - iseq.send(YARV.calldata(:putc, 1)) - end - - # $tape[$cursor] = $stdin.getc.ord - def input_char(iseq) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - - iseq.getglobal(:$stdin) - iseq.send(YARV.calldata(:getc)) - iseq.send(YARV.calldata(:ord)) - - iseq.send(YARV.calldata(:[]=, 2)) - end - - # unless $tape[$cursor] == 0 - def loop_start(iseq) - start_label = iseq.label - end_label = iseq.label - - iseq.push(start_label) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(YARV.calldata(:[], 1)) - - iseq.putobject(0) - iseq.send(YARV.calldata(:==, 1)) - iseq.branchunless(end_label) - - [start_label, end_label] - end - - # Jump back to the start of the loop. - def loop_end(iseq, start_label, end_label) - iseq.jump(start_label) - iseq.push(end_label) - end - end - end -end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb deleted file mode 100644 index 4af5d6f0..00000000 --- a/lib/syntax_tree/yarv/compiler.rb +++ /dev/null @@ -1,2287 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::YARV::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This represents a set of options that can be passed to the compiler to - # control how it compiles the code. It mirrors the options that can be - # passed to RubyVM::InstructionSequence.compile, except it only includes - # options that actually change the behavior. - class Options - def initialize( - frozen_string_literal: false, - inline_const_cache: true, - operands_unification: true, - peephole_optimization: true, - specialized_instruction: true, - tailcall_optimization: false - ) - @frozen_string_literal = frozen_string_literal - @inline_const_cache = inline_const_cache - @operands_unification = operands_unification - @peephole_optimization = peephole_optimization - @specialized_instruction = specialized_instruction - @tailcall_optimization = tailcall_optimization - end - - def to_hash - { - frozen_string_literal: @frozen_string_literal, - inline_const_cache: @inline_const_cache, - operands_unification: @operands_unification, - peephole_optimization: @peephole_optimization, - specialized_instruction: @specialized_instruction, - tailcall_optimization: @tailcall_optimization - } - end - - def frozen_string_literal! - @frozen_string_literal = true - end - - def frozen_string_literal? - @frozen_string_literal - end - - def inline_const_cache? - @inline_const_cache - end - - def operands_unification? - @operands_unification - end - - def peephole_optimization? - @peephole_optimization - end - - def specialized_instruction? - @specialized_instruction - end - - def tailcall_optimization? - @tailcall_optimization - end - end - - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - def visit_array(node) - node.contents ? visit_all(node.contents.parts) : [] - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - case (value = node.value) - when /^0b/ - value[2..].to_i(2) - when /^0o/ - value[2..].to_i(8) - when /^0d/ - value[2..].to_i - when /^0x/ - value[2..].to_i(16) - else - value.to_i - end - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :options - - # The current instruction sequence that is being compiled. - attr_reader :iseq - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize(options) - @options = options - @iseq = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if options.frozen_string_literal? - iseq.putobject(node.value[1..]) - else - iseq.putstring(node.value[1..]) - end - end - - def visit_END(node) - once_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - postexe_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.send( - YARV.calldata(:"core#set_postexe", 0, CallData::CALL_FCALL), - postexe_iseq - ) - iseq.leave - end - - iseq.once(once_iseq, iseq.inline_storage) - iseq.pop - end - - def visit_alias(node) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) - visit(node.left) - visit(node.right) - iseq.send(YARV.calldata(:"core#set_method_alias", 3)) - end - - def visit_aref(node) - calldata = YARV.calldata(:[], 1) - visit(node.collection) - - if !options.frozen_string_literal? && - options.specialized_instruction? && (node.index.parts.length == 1) - arg = node.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - iseq.opt_aref_with(string_part.value, calldata) - return - end - end - end - - visit(node.index) - iseq.send(calldata) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - iseq.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - elsif node.contents && node.contents.parts.length == 1 && - node.contents.parts.first.is_a?(BareAssocHash) && - node.contents.parts.first.assocs.length == 1 && - node.contents.parts.first.assocs.first.is_a?(AssocSplat) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.newhash(0) - visit(node.contents.parts.first) - iseq.send(YARV.calldata(:"core#hash_merge_kwd", 2)) - iseq.newarraykwsplat(1) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - iseq.newarray(length) - length = 0 - end - - visit(part.value) - iseq.concatarray - else - visit(part) - length += 1 - end - end - - iseq.newarray(length) if length > 0 - iseq.concatarray if length > 0 && length != node.contents.parts.length - end - end - - def visit_aryptn(node) - end - - def visit_assign(node) - case node.target - when ARefField - calldata = YARV.calldata(:[]=, 2) - - if !options.frozen_string_literal? && - options.specialized_instruction? && - (node.target.index.parts.length == 1) - arg = node.target.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - visit(node.target.collection) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.opt_aset_with(string_part.value, calldata) - iseq.pop - return - end - end - end - - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - iseq.setn(3) - iseq.send(calldata) - iseq.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - iseq.opt_getconstant_path(names) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.opt_getconstant_path(names) - iseq.setconstant(name) - end - when Field - iseq.putnil - visit(node.target) - visit(node.value) - iseq.setn(2) - iseq.send(YARV.calldata(:"#{node.target.name.value}=", 1)) - iseq.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - iseq.putobject(Object) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.putobject(Object) - iseq.setconstant(name) - end - when VarField - visit(node.value) - iseq.dup if last_statement? - - case node.target.value - when Const - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - iseq.setconstant(node.target.value.value.to_sym) - when CVar - iseq.setclassvariable(node.target.value.value.to_sym) - when GVar - iseq.setglobal(node.target.value.value.to_sym) - when Ident - lookup = visit(node.target) - - if lookup.local.is_a?(LocalTable::BlockLocal) - iseq.setblockparam(lookup.index, lookup.level) - else - iseq.setlocal(lookup.index, lookup.level) - end - when IVar - iseq.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - iseq.getspecial(GetSpecial::SVAR_BACKREF, node.value[1..].to_i << 1) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_begin(node) - end - - def visit_binary(node) - case node.operator - when :"&&" - done_label = iseq.label - - visit(node.left) - iseq.dup - iseq.branchunless(done_label) - - iseq.pop - visit(node.right) - iseq.push(done_label) - when :"||" - visit(node.left) - iseq.dup - - skip_right_label = iseq.label - iseq.branchif(skip_right_label) - iseq.pop - - visit(node.right) - iseq.push(skip_right_label) - else - visit(node.left) - visit(node.right) - iseq.send(YARV.calldata(node.operator, 1)) - end - end - - def visit_block(node) - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } - end - - def visit_blockarg(node) - iseq.argument_options[:block_start] = iseq.argument_size - iseq.local_table.block(node.name.value.to_sym) - iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_break(node) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - # Track whether or not this is a method call on a block proxy receiver. - # If it is, we can potentially do tailcall optimizations on it. - block_receiver = false - - if node.receiver - if node.receiver.is_a?(VarRef) - lookup = iseq.local_variable(node.receiver.value.value.to_sym) - - if lookup.local.is_a?(LocalTable::BlockLocal) - iseq.getblockparamproxy(lookup.index, lookup.level) - block_receiver = true - else - visit(node.receiver) - end - else - visit(node.receiver) - end - else - iseq.putself - end - - after_call_label = nil - if node.operator&.value == "&." - iseq.dup - after_call_label = iseq.label - iseq.branchnil(after_call_label) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= CallData::CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= CallData::CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? - - flag |= CallData::CALL_ARGS_SPLAT - lookup = iseq.local_table.find(:*) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(arg_parts.length != 1) - - flag |= CallData::CALL_ARGS_BLOCKARG - lookup = iseq.local_table.find(:&) - iseq.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= CallData::CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - - # If there's no block and we don't already have any special flags set, - # then we can safely call this simple arguments. Note that has to be the - # first flag we set after looking at the arguments to get the flags - # correct. - flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - - # If there's no receiver, then this is an "fcall". - flag |= CallData::CALL_FCALL if node.receiver.nil? - - # If we're calling a method on the passed block object and we have - # tailcall optimizations turned on, then we can set the tailcall flag. - if block_receiver && options.tailcall_optimization? - flag |= CallData::CALL_TAILCALL - end - - iseq.send( - YARV.calldata(node.message.value.to_sym, argc, flag), - block_iseq - ) - iseq.event(after_call_label) if after_call_label - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - iseq.topn(1) - iseq.send( - YARV.calldata( - :===, - 1, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE - ) - ) - - label = iseq.label - iseq.branchif(label) - [clause, label] - end - - iseq.pop - else_clause ? visit(else_clause) : iseq.putnil - iseq.leave - - branches.each_with_index do |(clause, label), index| - iseq.leave if index != 0 - iseq.push(label) - iseq.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_child_iseq(iseq.class_child_iseq(name, node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = DefineClass::TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - when TopConstRef - flags |= DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - if node.superclass - flags |= DefineClass::FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - iseq.putnil - end - - iseq.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - iseq.opt_getconstant_path(names) - end - - def visit_def(node) - name = node.name.value.to_sym - method_iseq = iseq.method_child_iseq(name.to_s, node.location) - - with_child_iseq(method_iseq) do - visit(node.params) if node.params - iseq.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_RETURN) - iseq.leave - end - - if node.target - visit(node.target) - iseq.definesmethod(name, method_iseq) - else - iseq.definemethod(name, method_iseq) - end - - iseq.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - iseq.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - iseq.putnil - iseq.defined(Defined::TYPE_CONST, name, "constant") - when CVar - iseq.putnil - iseq.defined(Defined::TYPE_CVAR, name, "class variable") - when GVar - iseq.putnil - iseq.defined(Defined::TYPE_GVAR, name, "global-variable") - when Ident - iseq.putobject("local-variable") - when IVar - iseq.putnil - iseq.defined(Defined::TYPE_IVAR, name, "instance-variable") - when Kw - case name - when :false - iseq.putobject("false") - when :nil - iseq.putobject("nil") - when :self - iseq.putobject("self") - when :true - iseq.putobject("true") - end - end - when VCall - iseq.putself - - name = node.value.value.value.to_sym - iseq.defined(Defined::TYPE_FUNC, name, "method") - when YieldNode - iseq.putnil - iseq.defined(Defined::TYPE_YIELD, false, "yield") - when ZSuper - iseq.putnil - iseq.defined(Defined::TYPE_ZSUPER, false, "super") - else - iseq.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - iseq.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - iseq.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_ensure(node) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_fndptn(node) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - iseq.local_table.plain(name) - - block_iseq = - with_child_iseq(iseq.block_child_iseq(node.statements.location)) do - iseq.argument_options[:lead_num] ||= 0 - iseq.argument_options[:lead_num] += 1 - iseq.argument_options[:ambiguous_param0] = true - - iseq.argument_size += 1 - iseq.local_table.plain(2) - - iseq.getlocal(0, 0) - - local_variable = iseq.local_variable(name) - iseq.setlocal(local_variable.index, local_variable.level) - - iseq.event(:RUBY_EVENT_B_CALL) - iseq.nop - - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.send(YARV.calldata(:each, 0, 0), block_iseq) - end - - def visit_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - iseq.newhash(node.assocs.length * 2) - end - end - - def visit_hshptn(node) - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_if(node) - if node.predicate.is_a?(RangeNode) - true_label = iseq.label - false_label = iseq.label - end_label = iseq.label - - iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) - iseq.branchif(true_label) - - visit(node.predicate.left) - iseq.branchunless(end_label) - - iseq.putobject(true) - iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - - iseq.push(true_label) - visit(node.predicate.right) - iseq.branchunless(false_label) - - iseq.putobject(false) - iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - - iseq.push(false_label) - visit(node.statements) - iseq.leave - iseq.push(end_label) - iseq.putnil - else - consequent_label = iseq.label - - visit(node.predicate) - iseq.branchunless(consequent_label) - visit(node.statements) - - if last_statement? - iseq.leave - iseq.push(consequent_label) - node.consequent ? visit(node.consequent) : iseq.putnil - else - iseq.pop - - if node.consequent - done_label = iseq.label - iseq.jump(done_label) - iseq.push(consequent_label) - visit(node.consequent) - iseq.push(done_label) - else - iseq.push(consequent_label) - end - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.truthy, - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - iseq.argument_options[:kwrest] = iseq.argument_size - iseq.argument_size += 1 - iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.send(YARV.calldata(:lambda, 0, CallData::CALL_FCALL), lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - iseq.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - iseq.expandarray(lookups.length, 0) - lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_child_iseq(iseq.module_child_iseq(name, node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = DefineClass::TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - when TopConstRef - flags |= DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - iseq.putnil - iseq.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.parts) - iseq.newarray(node.parts.length) - end - end - - def visit_next(node) - end - - def visit_not(node) - visit(node.statement) - iseq.send(YARV.calldata(:!)) - end - - def visit_opassign(node) - flag = CallData::CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= CallData::CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - done_label = iseq.label - - with_opassign(node) do - iseq.dup - iseq.branchunless(done_label) - iseq.pop - visit(node.value) - end - - case node.target - when ARefField - iseq.leave - iseq.push(done_label) - iseq.setn(3) - iseq.adjuststack(3) - when ConstPathField, TopConstField - iseq.push(done_label) - iseq.swap - iseq.pop - else - iseq.push(done_label) - end - when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) - opassign_defined(node) - iseq.swap - iseq.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - skip_value_label = iseq.label - - with_opassign(node) do - iseq.dup - iseq.branchif(skip_value_label) - iseq.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - iseq.leave - iseq.push(skip_value_label) - iseq.setn(3) - iseq.adjuststack(3) - else - iseq.push(skip_value_label) - end - end - else - with_opassign(node) do - visit(node.value) - iseq.send(YARV.calldata(operator, 1, flag)) - end - end - end - - def visit_params(node) - if node.requireds.any? - iseq.argument_options[:lead_num] = 0 - - node.requireds.each do |required| - iseq.local_table.plain(required.value.to_sym) - iseq.argument_size += 1 - iseq.argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = iseq.local_table.size - name = optional.value.to_sym - - iseq.local_table.plain(name) - iseq.argument_size += 1 - - unless iseq.argument_options.key?(:opt) - start_label = iseq.label - iseq.push(start_label) - iseq.argument_options[:opt] = [start_label] - end - - visit(value) - iseq.setlocal(index, 0) - - arg_given_label = iseq.label - iseq.push(arg_given_label) - iseq.argument_options[:opt] << arg_given_label - end - - visit(node.rest) if node.rest - - if node.posts.any? - iseq.argument_options[:post_start] = iseq.argument_size - iseq.argument_options[:post_num] = 0 - - node.posts.each do |post| - iseq.local_table.plain(post.value.to_sym) - iseq.argument_size += 1 - iseq.argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - iseq.argument_options[:kwbits] = 0 - iseq.argument_options[:keyword] = [] - - keyword_bits_name = node.keyword_rest ? 3 : 2 - iseq.argument_size += 1 - keyword_bits_index = iseq.local_table.locals.size + node.keywords.size - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = iseq.local_table.size - - iseq.local_table.plain(name) - iseq.argument_size += 1 - iseq.argument_options[:kwbits] += 1 - - if value.nil? - iseq.argument_options[:keyword] << name - elsif (compiled = RubyVisitor.compile(value)) - iseq.argument_options[:keyword] << [name, compiled] - else - skip_value_label = iseq.label - - iseq.argument_options[:keyword] << [name] - iseq.checkkeyword(keyword_bits_index, keyword_index) - iseq.branchif(skip_value_label) - visit(value) - iseq.setlocal(index, 0) - iseq.push(skip_value_label) - end - end - - iseq.local_table.plain(keyword_bits_name) - end - - if node.keyword_rest.is_a?(ArgsForward) - if RUBY_VERSION >= "3.2" - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) - iseq.local_table.plain(:"...") - - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 - - iseq.argument_size += 2 - else - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) - - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 - - iseq.argument_size += 2 - end - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_pinned_begin(node) - end - - def visit_pinned_var_ref(node) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - options.frozen_string_literal! - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - top_iseq = - InstructionSequence.new( - :top, - "", - nil, - node.location, - options - ) - - with_child_iseq(top_iseq) do - visit_all(preexes) - - if statements.empty? - iseq.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - iseq.leave - end - - top_iseq.compile! - top_iseq - end - - def visit_qsymbols(node) - iseq.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if options.frozen_string_literal? - iseq.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_range(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - visit(node.left) - visit(node.right) - iseq.newrange(node.operator.value == ".." ? 0 : 1) - end - end - - def visit_rassign(node) - iseq.putnil - - if node.operator.is_a?(Kw) - match_label = iseq.label - - visit(node.value) - iseq.dup - - visit_pattern(node.pattern, match_label) - - iseq.pop - iseq.pop - iseq.putobject(false) - iseq.leave - - iseq.push(match_label) - iseq.adjuststack(2) - iseq.putobject(true) - else - no_key_label = iseq.label - end_leave_label = iseq.label - end_label = iseq.label - - iseq.putnil - iseq.putobject(false) - iseq.putnil - iseq.putnil - visit(node.value) - iseq.dup - - visit_pattern(node.pattern, end_label) - - # First we're going to push the core onto the stack, then we'll check - # if the value to match is truthy. If it is, we'll jump down to raise - # NoMatchingPatternKeyError. Otherwise we'll raise - # NoMatchingPatternError. - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.topn(4) - iseq.branchif(no_key_label) - - # Here we're going to raise NoMatchingPatternError. - iseq.putobject(NoMatchingPatternError) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(YARV.calldata(:"core#sprintf", 3)) - iseq.send(YARV.calldata(:"core#raise", 2)) - iseq.jump(end_leave_label) - - # Here we're going to raise NoMatchingPatternKeyError. - iseq.push(no_key_label) - iseq.putobject(NoMatchingPatternKeyError) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(YARV.calldata(:"core#sprintf", 3)) - iseq.topn(7) - iseq.topn(9) - iseq.send( - YARV.calldata(:new, 1, CallData::CALL_KWARG, %i[matchee key]) - ) - iseq.send(YARV.calldata(:"core#raise", 1)) - - iseq.push(end_leave_label) - iseq.adjuststack(7) - iseq.putnil - iseq.leave - - iseq.push(end_label) - iseq.adjuststack(6) - iseq.putnil - end - end - - def visit_rational(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_redo(node) - end - - def visit_regexp_literal(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - iseq.toregexp(flags, length) - end - end - - def visit_rescue(node) - end - - def visit_rescue_ex(node) - end - - def visit_rescue_mod(node) - end - - def visit_rest_param(node) - iseq.local_table.plain(node.name.value.to_sym) - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_size += 1 - end - - def visit_retry(node) - end - - def visit_return(node) - end - - def visit_sclass(node) - visit(node.target) - iseq.putnil - - singleton_iseq = - with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - iseq.defineclass( - :singletonclass, - singleton_iseq, - DefineClass::TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? iseq.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - - visit_string_literal( - StringLiteral.new( - parts: [TStringContent.new(value: value, location: node.location)], - quote: node.left.quote, - location: node.location - ) - ) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_super(node) - iseq.putself - visit(node.arguments) - iseq.invokesuper( - YARV.calldata( - nil, - argument_parts(node.arguments).length, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | - CallData::CALL_SUPER - ), - nil - ) - end - - def visit_symbol_literal(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - iseq.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - iseq.concatstrings(length) - iseq.intern - end - end - - iseq.newarray(node.elements.length) - end - end - - def visit_top_const_ref(node) - iseq.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if options.frozen_string_literal? - iseq.putobject(node.accept(RubyVisitor.new)) - else - iseq.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - iseq.pop if index != 0 - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) - visit(symbol) - iseq.send(YARV.calldata(:"core#undef_method", 2)) - end - end - - def visit_unless(node) - statements_label = iseq.label - - visit(node.predicate) - iseq.branchunless(statements_label) - node.consequent ? visit(node.consequent) : iseq.putnil - - if last_statement? - iseq.leave - iseq.push(statements_label) - visit(node.statements) - else - iseq.pop - - if node.consequent - done_label = iseq.label - iseq.jump(done_label) - iseq.push(statements_label) - visit(node.consequent) - iseq.push(done_label) - else - iseq.push(statements_label) - end - end - end - - def visit_until(node) - predicate_label = iseq.label - statements_label = iseq.label - - iseq.jump(predicate_label) - iseq.putnil - iseq.pop - iseq.jump(predicate_label) - - iseq.push(statements_label) - visit(node.statements) - iseq.pop - - iseq.push(predicate_label) - visit(node.predicate) - iseq.branchunless(statements_label) - iseq.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = iseq.local_variable(name)) - local_variable - else - iseq.local_table.plain(name) - iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - iseq.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - iseq.getclassvariable(name) - when GVar - iseq.getglobal(node.value.value.to_sym) - when Ident - lookup = iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when LocalTable::BlockLocal - iseq.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal - iseq.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - iseq.getinstancevariable(name) - when Kw - case node.value.value - when "false" - iseq.putobject(false) - when "nil" - iseq.putnil - when "self" - iseq.putself - when "true" - iseq.putobject(true) - end - end - end - - def visit_vcall(node) - iseq.putself - iseq.send( - YARV.calldata( - node.value.value.to_sym, - 0, - CallData::CALL_FCALL | CallData::CALL_VCALL | - CallData::CALL_ARGS_SIMPLE - ) - ) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - predicate_label = iseq.label - statements_label = iseq.label - - iseq.jump(predicate_label) - iseq.putnil - iseq.pop - iseq.jump(predicate_label) - - iseq.push(statements_label) - visit(node.statements) - iseq.pop - - iseq.push(predicate_label) - visit(node.predicate) - iseq.branchif(statements_label) - iseq.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_words(node) - if options.frozen_string_literal? && - (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - iseq.putself - length = visit_string_parts(node) - iseq.concatstrings(node.parts.length) if length > 1 - iseq.send( - YARV.calldata( - :`, - 1, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE - ) - ) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - iseq.invokeblock(YARV.calldata(nil, parts.length)) - end - - def visit_zsuper(_node) - iseq.putself - iseq.invokesuper( - YARV.calldata( - nil, - 0, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | - CallData::CALL_SUPER | CallData::CALL_ZSUPER - ), - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - value_label = iseq.label - skip_value_label = iseq.label - - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - iseq.dup - iseq.defined(Defined::TYPE_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.defined(Defined::TYPE_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - iseq.putnil - - case node.target.value - when Const - iseq.defined(Defined::TYPE_CONST, name, true) - when CVar - iseq.defined(Defined::TYPE_CVAR, name, true) - when GVar - iseq.defined(Defined::TYPE_GVAR, name, true) - end - end - - iseq.branchunless(value_label) - - case node.target - when ConstPathField, TopConstField - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - when VarField - case node.target.value - when Const - iseq.opt_getconstant_path(constant_names(node.target)) - when CVar - iseq.getclassvariable(name) - when GVar - iseq.getglobal(name) - end - end - - iseq.dup - iseq.branchif(skip_value_label) - - iseq.pop - iseq.push(value_label) - visit(node.value) - - case node.target - when ConstPathField, TopConstField - iseq.dupn(2) - iseq.swap - iseq.setconstant(name) - when VarField - iseq.dup - - case node.target.value - when Const - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - iseq.setconstant(name) - when CVar - iseq.setclassvariable(name) - when GVar - iseq.setglobal(name) - end - end - - iseq.push(skip_value_label) - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - iseq.dup - iseq.objtostring( - YARV.calldata( - :to_s, - 0, - CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE - ) - ) - iseq.anytostring - end - - # Visit a type of pattern in a pattern match. - def visit_pattern(node, end_label) - case node - when AryPtn - length_label = iseq.label - match_failure_label = iseq.label - match_error_label = iseq.label - - # If there's a constant, then check if we match against that constant - # or not first. Branch to failure if we don't. - if node.constant - iseq.dup - visit(node.constant) - iseq.checkmatch(CheckMatch::TYPE_CASE) - iseq.branchunless(match_failure_label) - end - - # First, check if the #deconstruct cache is nil. If it is, we're going - # to call #deconstruct on the object and cache the result. - iseq.topn(2) - deconstruct_label = iseq.label - iseq.branchnil(deconstruct_label) - - # Next, ensure that the cached value was cached correctly, otherwise - # fail the match. - iseq.topn(2) - iseq.branchunless(match_failure_label) - - # Since we have a valid cached value, we can skip past the part where - # we call #deconstruct on the object. - iseq.pop - iseq.topn(1) - iseq.jump(length_label) - - # Check if the object responds to #deconstruct, fail the match - # otherwise. - iseq.event(deconstruct_label) - iseq.dup - iseq.putobject(:deconstruct) - iseq.send(YARV.calldata(:respond_to?, 1)) - iseq.setn(3) - iseq.branchunless(match_failure_label) - - # Call #deconstruct and ensure that it's an array, raise an error - # otherwise. - iseq.send(YARV.calldata(:deconstruct)) - iseq.setn(2) - iseq.dup - iseq.checktype(CheckType::TYPE_ARRAY) - iseq.branchunless(match_error_label) - - # Ensure that the deconstructed array has the correct size, fail the - # match otherwise. - iseq.push(length_label) - iseq.dup - iseq.send(YARV.calldata(:length)) - iseq.putobject(node.requireds.length) - iseq.send(YARV.calldata(:==, 1)) - iseq.branchunless(match_failure_label) - - # For each required element, check if the deconstructed array contains - # the element, otherwise jump out to the top-level match failure. - iseq.dup - node.requireds.each_with_index do |required, index| - iseq.putobject(index) - iseq.send(YARV.calldata(:[], 1)) - - case required - when VarField - lookup = visit(required) - iseq.setlocal(lookup.index, lookup.level) - else - visit(required) - iseq.checkmatch(CheckMatch::TYPE_CASE) - iseq.branchunless(match_failure_label) - end - - if index < node.requireds.length - 1 - iseq.dup - else - iseq.pop - iseq.jump(end_label) - end - end - - # Set up the routine here to raise an error to indicate that the type - # of the deconstructed array was incorrect. - iseq.push(match_error_label) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject(TypeError) - iseq.putobject("deconstruct must return Array") - iseq.send(YARV.calldata(:"core#raise", 2)) - iseq.pop - - # Patch all of the match failures to jump here so that we pop a final - # value before returning to the parent node. - iseq.push(match_failure_label) - iseq.pop - when VarField - lookup = visit(node) - iseq.setlocal(lookup.index, lookup.level) - iseq.jump(end_label) - end - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - iseq.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - iseq.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_child_iseq(child_iseq) - parent_iseq = iseq - - begin - @iseq = child_iseq - yield - child_iseq - ensure - @iseq = parent_iseq - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - - iseq.dupn(2) - iseq.send(YARV.calldata(:[], 1)) - - yield - - iseq.setn(3) - iseq.send(YARV.calldata(:[]=, 2)) - iseq.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - iseq.opt_getconstant_path(names) - - yield - - iseq.dup - iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) - iseq.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - iseq.getclassvariable(name) - - yield - - iseq.dup - iseq.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - iseq.getglobal(name) - - yield - - iseq.dup - iseq.setglobal(name) - when Ident - local_variable = visit(node.target) - iseq.getlocal(local_variable.index, local_variable.level) - - yield - - iseq.dup - iseq.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - iseq.getinstancevariable(name) - - yield - - iseq.dup - iseq.setinstancevariable(name) - end - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb deleted file mode 100644 index a6a567fb..00000000 --- a/lib/syntax_tree/yarv/decompiler.rb +++ /dev/null @@ -1,254 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. - class Decompiler - # When we're decompiling, we use a looped case statement to emulate - # jumping around in the same way the virtual machine would. This class - # provides convenience methods for generating the AST nodes that have to - # do with that label. - class BlockLabel - include DSL - attr_reader :name - - def initialize(name) - @name = name - end - - def field - VarField(Ident(name)) - end - - def ref - VarRef(Ident(name)) - end - end - - include DSL - attr_reader :iseq, :block_label - - def initialize(iseq) - @iseq = iseq - @block_label = BlockLabel.new("__block_label") - end - - def to_ruby - Program(decompile(iseq)) - end - - private - - def node_for(value) - case value - when Integer - Int(value.to_s) - when Symbol - SymbolLiteral(Ident(value.to_s)) - end - end - - def decompile(iseq) - label = :label_0 - clauses = {} - clause = [] - - iseq.insns.each do |insn| - case insn - when InstructionSequence::Label - unless clause.last.is_a?(Next) - clause << Assign(block_label.field, node_for(insn.name)) - end - - clauses[label] = clause - clause = [] - label = insn.name - when BranchUnless - body = [ - Assign(block_label.field, node_for(insn.label.name)), - Next(Args([])) - ] - - clause << IfNode(clause.pop, Statements(body), nil) - when Dup - clause << clause.last - when DupHash - assocs = - insn.object.map do |key, value| - Assoc(node_for(key), node_for(value)) - end - - clause << HashLiteral(LBrace("{"), assocs) - when GetGlobal - clause << VarRef(GVar(insn.name.to_s)) - when GetLocalWC0 - local = iseq.local_table.locals[insn.index] - clause << VarRef(Ident(local.name.to_s)) - when Jump - clause << Assign(block_label.field, node_for(insn.label.name)) - clause << Next(Args([])) - when Leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, - OptMinus, OptMod, OptMult, OptOr, OptPlus - left, right = clause.pop(2) - clause << Binary(left, insn.calldata.method, right) - when OptAref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when OptAset - collection, arg, value = clause.pop(3) - - clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && - collection === value.left.collection && - arg === value.left.index.parts[0] - OpAssign( - ARefField(collection, Args([arg])), - Op("#{value.operator}="), - value.right - ) - else - Assign(ARefField(collection, Args([arg])), value) - end - when OptNEq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when OptSendWithoutBlock - method = insn.calldata.method.to_s - argc = insn.calldata.argc - - if insn.calldata.flag?(CallData::CALL_FCALL) - if argc == 0 - clause.pop - clause << CallNode(nil, nil, Ident(method), Args([])) - elsif argc == 1 && method.end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign( - CallNode(nil, nil, Ident(method[0..-2]), nil), - argument - ) - else - _receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - nil, - nil, - Ident(method), - ArgParen(Args(arguments)) - ) - end - else - if argc == 0 - clause << CallNode(clause.pop, Period("."), Ident(method), nil) - elsif argc == 1 && method.end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign( - CallNode(receiver, Period("."), Ident(method[0..-2]), nil), - argument - ) - else - receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - receiver, - Period("."), - Ident(method), - ArgParen(Args(arguments)) - ) - end - end - when PutObject - case insn.object - when Float - clause << FloatLiteral(insn.object.inspect) - when Integer - clause << Int(insn.object.inspect) - else - raise "Unknown object type: #{insn.object.class.name}" - end - when PutObjectInt2Fix0 - clause << Int("0") - when PutObjectInt2Fix1 - clause << Int("1") - when PutSelf - clause << VarRef(Kw("self")) - when SetGlobal - target = GVar(insn.name.to_s) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) - end - when SetLocalWC0 - target = Ident(local_name(insn.index, 0)) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) - end - else - raise "Unknown instruction #{insn}" - end - end - - # If there's only one clause, then we don't need a case statement, and - # we can just disassemble the first clause. - clauses[label] = clause - return Statements(clauses.values.first) if clauses.size == 1 - - # Here we're going to build up a big case statement that will handle all - # of the different labels. - current = nil - clauses.reverse_each do |current_label, current_clause| - current = - When( - Args([node_for(current_label)]), - Statements(current_clause), - current - ) - end - switch = Case(Kw("case"), block_label.ref, current) - - # Here we're going to make sure that any locals that were established in - # the label_0 block are initialized so that scoping rules work - # correctly. - stack = [] - locals = [block_label.name] - - clauses[:label_0].each do |node| - if node.is_a?(Assign) && node.target.is_a?(VarField) && - node.target.value.is_a?(Ident) - value = node.target.value.value - next if locals.include?(value) - - stack << Assign(node.target, VarRef(Kw("nil"))) - locals << value - end - end - - # Finally, we'll set up the initial label and loop the entire case - # statement. - stack << Assign(block_label.field, node_for(:label_0)) - stack << MethodAddBlock( - CallNode(nil, nil, Ident("loop"), Args([])), - BlockNode( - Kw("do"), - nil, - BodyStmt(Statements([switch]), nil, nil, nil, nil) - ) - ) - Statements(stack) - end - - def local_name(index, level) - current = iseq - level.times { current = current.parent_iseq } - current.local_table.locals[index].name.to_s - end - end - end -end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb deleted file mode 100644 index 033b6d3d..00000000 --- a/lib/syntax_tree/yarv/disassembler.rb +++ /dev/null @@ -1,211 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - class Disassembler - attr_reader :output, :queue - attr_reader :current_prefix, :current_iseq - - def initialize - @output = StringIO.new - @queue = [] - - @current_prefix = "" - @current_iseq = nil - end - - ######################################################################## - # Helpers for various instructions - ######################################################################## - - def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" - end - - def enqueue(iseq) - queue << iseq - end - - def event(name) - case name - when :RUBY_EVENT_B_CALL - "Bc" - when :RUBY_EVENT_B_RETURN - "Br" - when :RUBY_EVENT_CALL - "Ca" - when :RUBY_EVENT_CLASS - "Cl" - when :RUBY_EVENT_END - "En" - when :RUBY_EVENT_LINE - "Li" - when :RUBY_EVENT_RETURN - "Re" - else - raise "Unknown event: #{name}" - end - end - - def inline_storage(cache) - "" - end - - def instruction(name, operands = []) - operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] - end - - def label(value) - value.name["label_".length..] - end - - def local(index, explicit: nil, implicit: nil) - current = current_iseq - (explicit || implicit).times { current = current.parent_iseq } - - value = "#{current.local_table.name_at(index)}@#{index}" - value << ", #{explicit}" if explicit - value - end - - def object(value) - value.inspect - end - - ######################################################################## - # Main entrypoint - ######################################################################## - - def format! - while (@current_iseq = queue.shift) - output << "\n" if output.pos > 0 - format_iseq(@current_iseq) - end - - output.string - end - - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = iseq.location - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - length = 0 - events = [] - lines = [] - - iseq.insns.each do |insn| - case insn - when Integer - lines << insn - when Symbol - events << event(insn) - when InstructionSequence::Label - # skip - else - output << "#{current_prefix}%04d " % length - - disasm = insn.disasm(self) - output << disasm - - if lines.any? - output << " " * (65 - disasm.length) if disasm.length < 65 - elsif events.any? - output << " " * (39 - disasm.length) if disasm.length < 39 - end - - if lines.any? - output << "(%4d)" % lines.last - lines.clear - end - - if events.any? - output << "[#{events.join}]" - events.clear - end - - output << "\n" - length += insn.length - end - end - end - - def with_prefix(value) - previous = @current_prefix - - begin - @current_prefix = value - yield - ensure - @current_prefix = previous - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb deleted file mode 100644 index 48305be6..00000000 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ /dev/null @@ -1,1171 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # This module provides an object representation of the YARV bytecode. - module YARV - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - # When the list of instructions is first being created, it's stored as a - # linked list. This is to make it easier to perform peephole optimizations - # and other transformations like instruction specialization. - class InstructionList - class Node - attr_accessor :value, :next_node - - def initialize(value, next_node = nil) - @value = value - @next_node = next_node - end - end - - include Enumerable - attr_reader :head_node, :tail_node - - def initialize - @head_node = nil - @tail_node = nil - end - - def each - return to_enum(__method__) unless block_given? - each_node { |node| yield node.value } - end - - def each_node - return to_enum(__method__) unless block_given? - node = head_node - - while node - yield node, node.value - node = node.next_node - end - end - - def push(instruction) - node = Node.new(instruction) - - if head_node.nil? - @head_node = node - @tail_node = node - else - @tail_node.next_node = node - @tail_node = node - end - - node - end - end - - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - begin - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - rescue NameError - end - - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents the destination of instructions that jump. Initially it - # does not track its position so that when we perform optimizations the - # indices don't get messed up. - class Label - attr_reader :name - - # When we're serializing the instruction sequence, we need to be able to - # look up the label from the branch instructions and then access the - # subsequent node. So we'll store the reference here. - attr_accessor :node - - def initialize(name = nil) - @name = name - end - - def patch!(name) - @name = name - end - - def inspect - name.inspect - end - end - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The catch table for this instruction sequence. - attr_reader :catch_table - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - # These are various compilation options provided. - attr_reader :options - - def initialize( - type, - name, - parent_iseq, - location, - options = Compiler::Options.new - ) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - @catch_table = [] - - @local_table = LocalTable.new - @inline_storages = {} - @insns = InstructionList.new - @storage_index = 0 - @stack = Stack.new - - @options = options - end - - ########################################################################## - # Query methods - ########################################################################## - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - inline_storages[name] = inline_storage unless inline_storages.key?(name) - - inline_storages[name] - end - - def length - insns - .each - .inject(0) do |sum, insn| - case insn - when Integer, Label, Symbol - sum - else - sum + insn.length - end - end - end - - def eval - raise "Unsupported platform" if ISEQ_LOAD.nil? - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - # Dump all of the instructions into a flat list. - dumped = - insns.map do |insn| - case insn - when Integer, Symbol - insn - when Label - insn.name - else - insn.to_a(self) - end - end - - dumped_options = argument_options.dup - dumped_options[:opt].map!(&:name) if dumped_options[:opt] - - # Next, return the instruction sequence as an array. - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size, - node_id: -1, - node_ids: [-1] * insns.length - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - dumped_options, - catch_table.map(&:to_a), - dumped - ] - end - - def disasm - disassembler = Disassembler.new - disassembler.enqueue(self) - disassembler.format! - end - - # This method converts our linked list of instructions into a final array - # and performs any other compilation steps necessary. - def compile! - specialize_instructions! if options.specialized_instruction? - - length = 0 - insns.each do |insn| - case insn - when Integer, Symbol - # skip - when Label - insn.patch!(:"label_#{length}") - when DefineClass - insn.class_iseq.compile! - length += insn.length - when DefineMethod, DefineSMethod - insn.method_iseq.compile! - length += insn.length - when InvokeSuper, Send - insn.block_iseq.compile! if insn.block_iseq - length += insn.length - when Once - insn.iseq.compile! - length += insn.length - else - length += insn.length - end - end - - @insns = insns.to_a - end - - def specialize_instructions! - insns.each_node do |node, value| - case value - when NewArray - next unless node.next_node - - next_node = node.next_node - next unless next_node.value.is_a?(Send) - next if next_node.value.block_iseq - - calldata = next_node.value.calldata - next unless calldata.flags == CallData::CALL_ARGS_SIMPLE - next unless calldata.argc == 0 - - case calldata.method - when :max - node.value = OptNewArrayMax.new(value.number) - node.next_node = next_node.next_node - when :min - node.value = OptNewArrayMin.new(value.number) - node.next_node = next_node.next_node - end - when PutObject, PutString - next unless node.next_node - next if value.is_a?(PutObject) && !value.object.is_a?(String) - - next_node = node.next_node - next unless next_node.value.is_a?(Send) - next if next_node.value.block_iseq - - calldata = next_node.value.calldata - next unless calldata.flags == CallData::CALL_ARGS_SIMPLE - next unless calldata.argc == 0 - - case calldata.method - when :freeze - node.value = OptStrFreeze.new(value.object, calldata) - node.next_node = next_node.next_node - when :-@ - node.value = OptStrUMinus.new(value.object, calldata) - node.next_node = next_node.next_node - end - when Send - calldata = value.calldata - - if !value.block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and - # the number of arguments. - node.value = - case [calldata.method, calldata.argc] - when [:length, 0] - OptLength.new(calldata) - when [:size, 0] - OptSize.new(calldata) - when [:empty?, 0] - OptEmptyP.new(calldata) - when [:nil?, 0] - OptNilP.new(calldata) - when [:succ, 0] - OptSucc.new(calldata) - when [:!, 0] - OptNot.new(calldata) - when [:+, 1] - OptPlus.new(calldata) - when [:-, 1] - OptMinus.new(calldata) - when [:*, 1] - OptMult.new(calldata) - when [:/, 1] - OptDiv.new(calldata) - when [:%, 1] - OptMod.new(calldata) - when [:==, 1] - OptEq.new(calldata) - when [:!=, 1] - OptNEq.new(YARV.calldata(:==, 1), calldata) - when [:=~, 1] - OptRegExpMatch2.new(calldata) - when [:<, 1] - OptLT.new(calldata) - when [:<=, 1] - OptLE.new(calldata) - when [:>, 1] - OptGT.new(calldata) - when [:>=, 1] - OptGE.new(calldata) - when [:<<, 1] - OptLTLT.new(calldata) - when [:[], 1] - OptAref.new(calldata) - when [:&, 1] - OptAnd.new(calldata) - when [:|, 1] - OptOr.new(calldata) - when [:[]=, 2] - OptAset.new(calldata) - else - OptSendWithoutBlock.new(calldata) - end - end - end - end - end - - ########################################################################## - # Child instruction sequence methods - ########################################################################## - - def child_iseq(type, name, location) - InstructionSequence.new(type, name, self, location, options) - end - - def block_child_iseq(location) - current = self - current = current.parent_iseq while current.type == :block - child_iseq(:block, "block in #{current.name}", location) - end - - def class_child_iseq(name, location) - child_iseq(:class, "", location) - end - - def method_child_iseq(name, location) - child_iseq(:method, name, location) - end - - def module_child_iseq(name, location) - child_iseq(:class, "", location) - end - - def singleton_class_child_iseq(location) - child_iseq(:class, "singleton class", location) - end - - ########################################################################## - # Catch table methods - ########################################################################## - - class CatchEntry - attr_reader :iseq, :begin_label, :end_label, :exit_label - - def initialize(iseq, begin_label, end_label, exit_label) - @iseq = iseq - @begin_label = begin_label - @end_label = end_label - @exit_label = exit_label - end - end - - class CatchBreak < CatchEntry - def to_a - [:break, iseq.to_a, begin_label.name, end_label.name, exit_label.name] - end - end - - class CatchNext < CatchEntry - def to_a - [:next, nil, begin_label.name, end_label.name, exit_label.name] - end - end - - class CatchRedo < CatchEntry - def to_a - [:redo, nil, begin_label.name, end_label.name, exit_label.name] - end - end - - class CatchRescue < CatchEntry - def to_a - [ - :rescue, - iseq.to_a, - begin_label.name, - end_label.name, - exit_label.name - ] - end - end - - class CatchRetry < CatchEntry - def to_a - [:retry, nil, begin_label.name, end_label.name, exit_label.name] - end - end - - def catch_break(iseq, begin_label, end_label, exit_label) - catch_table << CatchBreak.new(iseq, begin_label, end_label, exit_label) - end - - def catch_next(begin_label, end_label, exit_label) - catch_table << CatchNext.new(nil, begin_label, end_label, exit_label) - end - - def catch_redo(begin_label, end_label, exit_label) - catch_table << CatchRedo.new(nil, begin_label, end_label, exit_label) - end - - def catch_rescue(iseq, begin_label, end_label, exit_label) - catch_table << CatchRescue.new(iseq, begin_label, end_label, exit_label) - end - - def catch_retry(begin_label, end_label, exit_label) - catch_table << CatchRetry.new(nil, begin_label, end_label, exit_label) - end - - ########################################################################## - # Instruction push methods - ########################################################################## - - def label - Label.new - end - - def push(value) - node = insns.push(value) - - case value - when Array, Integer, Symbol - value - when Label - value.node = node - value - else - stack.change_by(-value.pops + value.pushes) - value - end - end - - def event(name) - push(name) - end - - def adjuststack(number) - push(AdjustStack.new(number)) - end - - def anytostring - push(AnyToString.new) - end - - def branchif(label) - push(BranchIf.new(label)) - end - - def branchnil(label) - push(BranchNil.new(label)) - end - - def branchunless(label) - push(BranchUnless.new(label)) - end - - def checkkeyword(keyword_bits_index, keyword_index) - push(CheckKeyword.new(keyword_bits_index, keyword_index)) - end - - def checkmatch(type) - push(CheckMatch.new(type)) - end - - def checktype(type) - push(CheckType.new(type)) - end - - def concatarray - push(ConcatArray.new) - end - - def concatstrings(number) - push(ConcatStrings.new(number)) - end - - def defined(type, name, message) - push(Defined.new(type, name, message)) - end - - def defineclass(name, class_iseq, flags) - push(DefineClass.new(name, class_iseq, flags)) - end - - def definemethod(name, method_iseq) - push(DefineMethod.new(name, method_iseq)) - end - - def definesmethod(name, method_iseq) - push(DefineSMethod.new(name, method_iseq)) - end - - def dup - push(Dup.new) - end - - def duparray(object) - push(DupArray.new(object)) - end - - def duphash(object) - push(DupHash.new(object)) - end - - def dupn(number) - push(DupN.new(number)) - end - - def expandarray(length, flags) - push(ExpandArray.new(length, flags)) - end - - def getblockparam(index, level) - push(GetBlockParam.new(index, level)) - end - - def getblockparamproxy(index, level) - push(GetBlockParamProxy.new(index, level)) - end - - def getclassvariable(name) - if RUBY_VERSION < "3.0" - push(Legacy::GetClassVariable.new(name)) - else - push(GetClassVariable.new(name, inline_storage_for(name))) - end - end - - def getconstant(name) - push(GetConstant.new(name)) - end - - def getglobal(name) - push(GetGlobal.new(name)) - end - - def getinstancevariable(name) - if RUBY_VERSION < "3.2" - push(GetInstanceVariable.new(name, inline_storage_for(name))) - else - push(GetInstanceVariable.new(name, inline_storage)) - end - end - - def getlocal(index, level) - if options.operands_unification? - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push(GetLocalWC0.new(index)) - when 1 - push(GetLocalWC1.new(index)) - else - push(GetLocal.new(index, level)) - end - else - push(GetLocal.new(index, level)) - end - end - - def getspecial(key, type) - push(GetSpecial.new(key, type)) - end - - def intern - push(Intern.new) - end - - def invokeblock(calldata) - push(InvokeBlock.new(calldata)) - end - - def invokesuper(calldata, block_iseq) - push(InvokeSuper.new(calldata, block_iseq)) - end - - def jump(label) - push(Jump.new(label)) - end - - def leave - push(Leave.new) - end - - def newarray(number) - push(NewArray.new(number)) - end - - def newarraykwsplat(number) - push(NewArrayKwSplat.new(number)) - end - - def newhash(number) - push(NewHash.new(number)) - end - - def newrange(exclude_end) - push(NewRange.new(exclude_end)) - end - - def nop - push(Nop.new) - end - - def objtostring(calldata) - push(ObjToString.new(calldata)) - end - - def once(iseq, cache) - push(Once.new(iseq, cache)) - end - - def opt_aref_with(object, calldata) - push(OptArefWith.new(object, calldata)) - end - - def opt_aset_with(object, calldata) - push(OptAsetWith.new(object, calldata)) - end - - def opt_case_dispatch(case_dispatch_hash, else_label) - push(OptCaseDispatch.new(case_dispatch_hash, else_label)) - end - - def opt_getconstant_path(names) - if RUBY_VERSION < "3.2" || !options.inline_const_cache? - cache = nil - cache_filled_label = nil - - if options.inline_const_cache? - cache = inline_storage - cache_filled_label = label - opt_getinlinecache(cache_filled_label, cache) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - elsif names[0] == :"" - names.shift - putobject(Object) - else - putnil - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - if options.inline_const_cache? - opt_setinlinecache(cache) - push(cache_filled_label) - end - else - push(OptGetConstantPath.new(names)) - end - end - - def opt_getinlinecache(label, cache) - push(Legacy::OptGetInlineCache.new(label, cache)) - end - - def opt_setinlinecache(cache) - push(Legacy::OptSetInlineCache.new(cache)) - end - - def pop - push(Pop.new) - end - - def putnil - push(PutNil.new) - end - - def putobject(object) - if options.operands_unification? - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - push(PutObjectInt2Fix0.new) - elsif object.eql?(1) - push(PutObjectInt2Fix1.new) - else - push(PutObject.new(object)) - end - else - push(PutObject.new(object)) - end - end - - def putself - push(PutSelf.new) - end - - def putspecialobject(object) - push(PutSpecialObject.new(object)) - end - - def putstring(object) - push(PutString.new(object)) - end - - def send(calldata, block_iseq = nil) - push(Send.new(calldata, block_iseq)) - end - - def setblockparam(index, level) - push(SetBlockParam.new(index, level)) - end - - def setclassvariable(name) - if RUBY_VERSION < "3.0" - push(Legacy::SetClassVariable.new(name)) - else - push(SetClassVariable.new(name, inline_storage_for(name))) - end - end - - def setconstant(name) - push(SetConstant.new(name)) - end - - def setglobal(name) - push(SetGlobal.new(name)) - end - - def setinstancevariable(name) - if RUBY_VERSION < "3.2" - push(SetInstanceVariable.new(name, inline_storage_for(name))) - else - push(SetInstanceVariable.new(name, inline_storage)) - end - end - - def setlocal(index, level) - if options.operands_unification? - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push(SetLocalWC0.new(index)) - when 1 - push(SetLocalWC1.new(index)) - else - push(SetLocal.new(index, level)) - end - else - push(SetLocal.new(index, level)) - end - end - - def setn(number) - push(SetN.new(number)) - end - - def setspecial(key) - push(SetSpecial.new(key)) - end - - def splatarray(flag) - push(SplatArray.new(flag)) - end - - def swap - push(Swap.new) - end - - def throw(type) - push(Throw.new(type)) - end - - def topn(number) - push(TopN.new(number)) - end - - def toregexp(options, length) - push(ToRegExp.new(options, length)) - end - - # This method will create a new instruction sequence from a serialized - # RubyVM::InstructionSequence object. - def self.from(source, options = Compiler::Options.new, parent_iseq = nil) - iseq = new(source[9], source[5], parent_iseq, Location.default, options) - - # set up the labels object so that the labels are shared between the - # location in the instruction sequence and the instructions that - # reference them - labels = Hash.new { |hash, name| hash[name] = Label.new(name) } - - # set up the correct argument size - iseq.argument_size = source[4][:arg_size] - - # set up all of the locals - source[10].each { |local| iseq.local_table.plain(local) } - - # set up the argument options - iseq.argument_options.merge!(source[11]) - if iseq.argument_options[:opt] - iseq.argument_options[:opt].map! { |opt| labels[opt] } - end - - # set up the catch table - source[12].each do |entry| - case entry[0] - when :break - iseq.catch_break( - from(entry[1]), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :next - iseq.catch_next( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :rescue - iseq.catch_rescue( - from(entry[1]), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :redo - iseq.catch_redo( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :retry - iseq.catch_retry( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - else - raise "unknown catch type: #{entry[0]}" - end - end - - # set up all of the instructions - source[13].each do |insn| - # add line numbers - if insn.is_a?(Integer) - iseq.push(insn) - next - end - - # add events and labels - if insn.is_a?(Symbol) - if insn.start_with?("label_") - iseq.push(labels[insn]) - else - iseq.push(insn) - end - next - end - - # add instructions, mapped to our own instruction classes - type, *opnds = insn - - case type - when :adjuststack - iseq.adjuststack(opnds[0]) - when :anytostring - iseq.anytostring - when :branchif - iseq.branchif(labels[opnds[0]]) - when :branchnil - iseq.branchnil(labels[opnds[0]]) - when :branchunless - iseq.branchunless(labels[opnds[0]]) - when :checkkeyword - iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) - when :checkmatch - iseq.checkmatch(opnds[0]) - when :checktype - iseq.checktype(opnds[0]) - when :concatarray - iseq.concatarray - when :concatstrings - iseq.concatstrings(opnds[0]) - when :defineclass - iseq.defineclass(opnds[0], from(opnds[1], options, iseq), opnds[2]) - when :defined - iseq.defined(opnds[0], opnds[1], opnds[2]) - when :definemethod - iseq.definemethod(opnds[0], from(opnds[1], options, iseq)) - when :definesmethod - iseq.definesmethod(opnds[0], from(opnds[1], options, iseq)) - when :dup - iseq.dup - when :duparray - iseq.duparray(opnds[0]) - when :duphash - iseq.duphash(opnds[0]) - when :dupn - iseq.dupn(opnds[0]) - when :expandarray - iseq.expandarray(opnds[0], opnds[1]) - when :getblockparam, :getblockparamproxy, :getlocal, :getlocal_WC_0, - :getlocal_WC_1, :setblockparam, :setlocal, :setlocal_WC_0, - :setlocal_WC_1 - current = iseq - level = 0 - - case type - when :getlocal_WC_1, :setlocal_WC_1 - level = 1 - when :getblockparam, :getblockparamproxy, :getlocal, :setblockparam, - :setlocal - level = opnds[1] - end - - level.times { current = current.parent_iseq } - index = current.local_table.size - opnds[0] + 2 - - case type - when :getblockparam - iseq.getblockparam(index, level) - when :getblockparamproxy - iseq.getblockparamproxy(index, level) - when :getlocal, :getlocal_WC_0, :getlocal_WC_1 - iseq.getlocal(index, level) - when :setblockparam - iseq.setblockparam(index, level) - when :setlocal, :setlocal_WC_0, :setlocal_WC_1 - iseq.setlocal(index, level) - end - when :getclassvariable - iseq.push(GetClassVariable.new(opnds[0], opnds[1])) - when :getconstant - iseq.getconstant(opnds[0]) - when :getglobal - iseq.getglobal(opnds[0]) - when :getinstancevariable - iseq.push(GetInstanceVariable.new(opnds[0], opnds[1])) - when :getspecial - iseq.getspecial(opnds[0], opnds[1]) - when :intern - iseq.intern - when :invokeblock - iseq.invokeblock(CallData.from(opnds[0])) - when :invokesuper - block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil - iseq.invokesuper(CallData.from(opnds[0]), block_iseq) - when :jump - iseq.jump(labels[opnds[0]]) - when :leave - iseq.leave - when :newarray - iseq.newarray(opnds[0]) - when :newarraykwsplat - iseq.newarraykwsplat(opnds[0]) - when :newhash - iseq.newhash(opnds[0]) - when :newrange - iseq.newrange(opnds[0]) - when :nop - iseq.nop - when :objtostring - iseq.objtostring(CallData.from(opnds[0])) - when :once - iseq.once(from(opnds[0], options, iseq), opnds[1]) - when :opt_and, :opt_aref, :opt_aset, :opt_div, :opt_empty_p, :opt_eq, - :opt_ge, :opt_gt, :opt_le, :opt_length, :opt_lt, :opt_ltlt, - :opt_minus, :opt_mod, :opt_mult, :opt_nil_p, :opt_not, :opt_or, - :opt_plus, :opt_regexpmatch2, :opt_send_without_block, :opt_size, - :opt_succ - iseq.send(CallData.from(opnds[0]), nil) - when :opt_aref_with - iseq.opt_aref_with(opnds[0], CallData.from(opnds[1])) - when :opt_aset_with - iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) - when :opt_case_dispatch - hash = - opnds[0] - .each_slice(2) - .to_h - .transform_values { |value| labels[value] } - iseq.opt_case_dispatch(hash, labels[opnds[1]]) - when :opt_getconstant_path - iseq.opt_getconstant_path(opnds[0]) - when :opt_getinlinecache - iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) - when :opt_newarray_max - iseq.newarray(opnds[0]) - iseq.send(YARV.calldata(:max)) - when :opt_newarray_min - iseq.newarray(opnds[0]) - iseq.send(YARV.calldata(:min)) - when :opt_neq - iseq.push( - OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) - ) - when :opt_setinlinecache - iseq.opt_setinlinecache(opnds[0]) - when :opt_str_freeze - iseq.putstring(opnds[0]) - iseq.send(YARV.calldata(:freeze)) - when :opt_str_uminus - iseq.putstring(opnds[0]) - iseq.send(YARV.calldata(:-@)) - when :pop - iseq.pop - when :putnil - iseq.putnil - when :putobject - iseq.putobject(opnds[0]) - when :putobject_INT2FIX_0_ - iseq.putobject(0) - when :putobject_INT2FIX_1_ - iseq.putobject(1) - when :putself - iseq.putself - when :putstring - iseq.putstring(opnds[0]) - when :putspecialobject - iseq.putspecialobject(opnds[0]) - when :send - block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil - iseq.send(CallData.from(opnds[0]), block_iseq) - when :setclassvariable - iseq.push(SetClassVariable.new(opnds[0], opnds[1])) - when :setconstant - iseq.setconstant(opnds[0]) - when :setglobal - iseq.setglobal(opnds[0]) - when :setinstancevariable - iseq.push(SetInstanceVariable.new(opnds[0], opnds[1])) - when :setn - iseq.setn(opnds[0]) - when :setspecial - iseq.setspecial(opnds[0]) - when :splatarray - iseq.splatarray(opnds[0]) - when :swap - iseq.swap - when :throw - iseq.throw(opnds[0]) - when :topn - iseq.topn(opnds[0]) - when :toregexp - iseq.toregexp(opnds[0], opnds[1]) - else - raise "Unknown instruction type: #{type}" - end - end - - iseq.compile! if iseq.type == :top - iseq - end - end - end -end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb deleted file mode 100644 index 288edb16..00000000 --- a/lib/syntax_tree/yarv/instructions.rb +++ /dev/null @@ -1,5203 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - CALL_ARGS_SPLAT = 1 << 0 - CALL_ARGS_BLOCKARG = 1 << 1 - CALL_FCALL = 1 << 2 - CALL_VCALL = 1 << 3 - CALL_ARGS_SIMPLE = 1 << 4 - CALL_BLOCKISEQ = 1 << 5 - CALL_KWARG = 1 << 6 - CALL_KW_SPLAT = 1 << 7 - CALL_TAILCALL = 1 << 8 - CALL_SUPER = 1 << 9 - CALL_ZSUPER = 1 << 10 - CALL_OPT_SEND = 1 << 11 - CALL_KW_SPLAT_MUT = 1 << 12 - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg - end - - def flag?(mask) - (flags & mask) > 0 - end - - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result - end - - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) - end - end - - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) - end - - # ### Summary - # - # `adjuststack` accepts a single integer argument and removes that many - # elements from the top of the stack. - # - # ### Usage - # - # ~~~ruby - # x = [true] - # x[0] ||= nil - # x[0] - # ~~~ - # - class AdjustStack - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("adjuststack", [fmt.object(number)]) - end - - def to_a(_iseq) - [:adjuststack, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.pop(number) - end - end - - # ### Summary - # - # `anytostring` ensures that the value on top of the stack is a string. - # - # It pops two values off the stack. If the first value is a string it - # pushes it back on the stack. If the first value is not a string, it uses - # Ruby's built in string coercion to coerce the second value to a string - # and then pushes that back on the stack. - # - # This is used in conjunction with `objtostring` as a fallback for when an - # object's `to_s` method does not return a string. - # - # ### Usage - # - # ~~~ruby - # "#{5}" - # ~~~ - # - class AnyToString - def disasm(fmt) - fmt.instruction("anytostring") - end - - def to_a(_iseq) - [:anytostring] - end - - def length - 1 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - original, value = vm.pop(2) - - if value.is_a?(String) - vm.push(value) - else - vm.push("#<#{original.class.name}:0000>") - end - end - end - - # ### Summary - # - # `branchif` has one argument: the jump index. It pops one value off the - # stack: the jump condition. - # - # If the value popped off the stack is true, `branchif` jumps to - # the jump index and continues executing there. - # - # ### Usage - # - # ~~~ruby - # x = true - # x ||= "foo" - # puts x - # ~~~ - # - class BranchIf - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("branchif", [fmt.label(label)]) - end - - def to_a(_iseq) - [:branchif, label.name] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.jump(label) if vm.pop - end - end - - # ### Summary - # - # `branchnil` has one argument: the jump index. It pops one value off the - # stack: the jump condition. - # - # If the value popped off the stack is nil, `branchnil` jumps to - # the jump index and continues executing there. - # - # ### Usage - # - # ~~~ruby - # x = nil - # if x&.to_s - # puts "hi" - # end - # ~~~ - # - class BranchNil - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("branchnil", [fmt.label(label)]) - end - - def to_a(_iseq) - [:branchnil, label.name] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.jump(label) if vm.pop.nil? - end - end - - # ### Summary - # - # `branchunless` has one argument: the jump index. It pops one value off - # the stack: the jump condition. - # - # If the value popped off the stack is false or nil, `branchunless` jumps - # to the jump index and continues executing there. - # - # ### Usage - # - # ~~~ruby - # if 2 + 3 - # puts "foo" - # end - # ~~~ - # - class BranchUnless - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("branchunless", [fmt.label(label)]) - end - - def to_a(_iseq) - [:branchunless, label.name] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.jump(label) unless vm.pop - end - end - - # ### Summary - # - # `checkkeyword` checks if a keyword was passed at the callsite that - # called into the method represented by the instruction sequence. It has - # two arguments: the index of the local variable that stores the keywords - # metadata and the index of the keyword within that metadata. It pushes - # a boolean onto the stack indicating whether or not the keyword was - # given. - # - # ### Usage - # - # ~~~ruby - # def evaluate(value: rand) - # value - # end - # - # evaluate(value: 3) - # ~~~ - # - class CheckKeyword - attr_reader :keyword_bits_index, :keyword_index - - def initialize(keyword_bits_index, keyword_index) - @keyword_bits_index = keyword_bits_index - @keyword_index = keyword_index - end - - def disasm(fmt) - fmt.instruction( - "checkkeyword", - [fmt.object(keyword_bits_index), fmt.object(keyword_index)] - ) - end - - def to_a(iseq) - [ - :checkkeyword, - iseq.local_table.offset(keyword_bits_index), - keyword_index - ] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) - end - end - - # ### Summary - # - # `checkmatch` checks if the current pattern matches the current value. It - # pops the target and the pattern off the stack and pushes a boolean onto - # the stack if it matches or not. - # - # ### Usage - # - # ~~~ruby - # foo in Foo - # ~~~ - # - class CheckMatch - TYPE_WHEN = 1 - TYPE_CASE = 2 - TYPE_RESCUE = 3 - - attr_reader :type - - def initialize(type) - @type = type - end - - def disasm(fmt) - fmt.instruction("checkmatch", [fmt.object(type)]) - end - - def to_a(_iseq) - [:checkmatch, type] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - raise NotImplementedError, "checkmatch" - end - end - - # ### Summary - # - # `checktype` checks if the value on top of the stack is of a certain type. - # The type is the only argument. It pops the value off the stack and pushes - # a boolean onto the stack indicating whether or not the value is of the - # given type. - # - # ### Usage - # - # ~~~ruby - # foo in [bar] - # ~~~ - # - class CheckType - TYPE_OBJECT = 0x01 - TYPE_CLASS = 0x02 - TYPE_MODULE = 0x03 - TYPE_FLOAT = 0x04 - TYPE_STRING = 0x05 - TYPE_REGEXP = 0x06 - TYPE_ARRAY = 0x07 - TYPE_HASH = 0x08 - TYPE_STRUCT = 0x09 - TYPE_BIGNUM = 0x0a - TYPE_FILE = 0x0b - TYPE_DATA = 0x0c - TYPE_MATCH = 0x0d - TYPE_COMPLEX = 0x0e - TYPE_RATIONAL = 0x0f - TYPE_NIL = 0x11 - TYPE_TRUE = 0x12 - TYPE_FALSE = 0x13 - TYPE_SYMBOL = 0x14 - TYPE_FIXNUM = 0x15 - TYPE_UNDEF = 0x16 - - attr_reader :type - - def initialize(type) - @type = type - end - - def disasm(fmt) - name = - case type - when TYPE_OBJECT - "T_OBJECT" - when TYPE_CLASS - "T_CLASS" - when TYPE_MODULE - "T_MODULE" - when TYPE_FLOAT - "T_FLOAT" - when TYPE_STRING - "T_STRING" - when TYPE_REGEXP - "T_REGEXP" - when TYPE_ARRAY - "T_ARRAY" - when TYPE_HASH - "T_HASH" - when TYPE_STRUCT - "T_STRUCT" - when TYPE_BIGNUM - "T_BIGNUM" - when TYPE_FILE - "T_FILE" - when TYPE_DATA - "T_DATA" - when TYPE_MATCH - "T_MATCH" - when TYPE_COMPLEX - "T_COMPLEX" - when TYPE_RATIONAL - "T_RATIONAL" - when TYPE_NIL - "T_NIL" - when TYPE_TRUE - "T_TRUE" - when TYPE_FALSE - "T_FALSE" - when TYPE_SYMBOL - "T_SYMBOL" - when TYPE_FIXNUM - "T_FIXNUM" - when TYPE_UNDEF - "T_UNDEF" - end - - fmt.instruction("checktype", [name]) - end - - def to_a(_iseq) - [:checktype, type] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - # TODO: This is incorrect. The instruction only pushes a single value - # onto the stack. However, if this is set to 1, we no longer match the - # output of RubyVM::InstructionSequence. So leaving this here until we - # can investigate further. - 2 - end - - def canonical - self - end - - def call(vm) - object = vm.pop - result = - case type - when TYPE_OBJECT - raise NotImplementedError, "checktype TYPE_OBJECT" - when TYPE_CLASS - object.is_a?(Class) - when TYPE_MODULE - object.is_a?(Module) - when TYPE_FLOAT - object.is_a?(Float) - when TYPE_STRING - object.is_a?(String) - when TYPE_REGEXP - object.is_a?(Regexp) - when TYPE_ARRAY - object.is_a?(Array) - when TYPE_HASH - object.is_a?(Hash) - when TYPE_STRUCT - object.is_a?(Struct) - when TYPE_BIGNUM - raise NotImplementedError, "checktype TYPE_BIGNUM" - when TYPE_FILE - object.is_a?(File) - when TYPE_DATA - raise NotImplementedError, "checktype TYPE_DATA" - when TYPE_MATCH - raise NotImplementedError, "checktype TYPE_MATCH" - when TYPE_COMPLEX - object.is_a?(Complex) - when TYPE_RATIONAL - object.is_a?(Rational) - when TYPE_NIL - object.nil? - when TYPE_TRUE - object == true - when TYPE_FALSE - object == false - when TYPE_SYMBOL - object.is_a?(Symbol) - when TYPE_FIXNUM - object.is_a?(Integer) - when TYPE_UNDEF - raise NotImplementedError, "checktype TYPE_UNDEF" - end - - vm.push(result) - end - end - - # ### Summary - # - # `concatarray` concatenates the two Arrays on top of the stack. - # - # It coerces the two objects at the top of the stack into Arrays by - # calling `to_a` if necessary, and makes sure to `dup` the first Array if - # it was already an Array, to avoid mutating it when concatenating. - # - # ### Usage - # - # ~~~ruby - # [1, *2] - # ~~~ - # - class ConcatArray - def disasm(fmt) - fmt.instruction("concatarray") - end - - def to_a(_iseq) - [:concatarray] - end - - def length - 1 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - left, right = vm.pop(2) - vm.push([*left, *right]) - end - end - - # ### Summary - # - # `concatstrings` pops a number of strings from the stack joins them - # together into a single string and pushes that string back on the stack. - # - # This does no coercion and so is always used in conjunction with - # `objtostring` and `anytostring` to ensure the stack contents are always - # strings. - # - # ### Usage - # - # ~~~ruby - # "#{5}" - # ~~~ - # - class ConcatStrings - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("concatstrings", [fmt.object(number)]) - end - - def to_a(_iseq) - [:concatstrings, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number).join) - end - end - - # ### Summary - # - # `defineclass` defines a class. First it pops the superclass off the - # stack, then it pops the object off the stack that the class should be - # defined under. It has three arguments: the name of the constant, the - # instruction sequence associated with the class, and various flags that - # indicate if it is a singleton class, a module, or a regular class. - # - # ### Usage - # - # ~~~ruby - # class Foo - # end - # ~~~ - # - class DefineClass - TYPE_CLASS = 0 - TYPE_SINGLETON_CLASS = 1 - TYPE_MODULE = 2 - FLAG_SCOPED = 8 - FLAG_HAS_SUPERCLASS = 16 - - attr_reader :name, :class_iseq, :flags - - def initialize(name, class_iseq, flags) - @name = name - @class_iseq = class_iseq - @flags = flags - end - - def disasm(fmt) - fmt.enqueue(class_iseq) - fmt.instruction( - "defineclass", - [fmt.object(name), class_iseq.name, fmt.object(flags)] - ) - end - - def to_a(_iseq) - [:defineclass, name, class_iseq.to_a, flags] - end - - def length - 4 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - object, superclass = vm.pop(2) - iseq = class_iseq - - clazz = Class.new(superclass || Object) - vm.push(vm.run_class_frame(iseq, clazz)) - - object.const_set(name, clazz) - end - end - - # ### Summary - # - # `defined` checks if the top value of the stack is defined. If it is, it - # pushes its value onto the stack. Otherwise it pushes `nil`. - # - # ### Usage - # - # ~~~ruby - # defined?(x) - # ~~~ - # - class Defined - TYPE_NIL = 1 - TYPE_IVAR = 2 - TYPE_LVAR = 3 - TYPE_GVAR = 4 - TYPE_CVAR = 5 - TYPE_CONST = 6 - TYPE_METHOD = 7 - TYPE_YIELD = 8 - TYPE_ZSUPER = 9 - TYPE_SELF = 10 - TYPE_TRUE = 11 - TYPE_FALSE = 12 - TYPE_ASGN = 13 - TYPE_EXPR = 14 - TYPE_REF = 15 - TYPE_FUNC = 16 - TYPE_CONST_FROM = 17 - - attr_reader :type, :name, :message - - def initialize(type, name, message) - @type = type - @name = name - @message = message - end - - def disasm(fmt) - type_name = - case type - when TYPE_NIL - "nil" - when TYPE_IVAR - "ivar" - when TYPE_LVAR - "lvar" - when TYPE_GVAR - "gvar" - when TYPE_CVAR - "cvar" - when TYPE_CONST - "const" - when TYPE_METHOD - "method" - when TYPE_YIELD - "yield" - when TYPE_ZSUPER - "zsuper" - when TYPE_SELF - "self" - when TYPE_TRUE - "true" - when TYPE_FALSE - "false" - when TYPE_ASGN - "asgn" - when TYPE_EXPR - "expr" - when TYPE_REF - "ref" - when TYPE_FUNC - "func" - when TYPE_CONST_FROM - "constant-from" - end - - fmt.instruction( - "defined", - [type_name, fmt.object(name), fmt.object(message)] - ) - end - - def to_a(_iseq) - [:defined, type, name, message] - end - - def length - 4 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - object = vm.pop - - result = - case type - when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR - message - when TYPE_IVAR - message if vm._self.instance_variable_defined?(name) - when TYPE_LVAR - raise NotImplementedError, "defined TYPE_LVAR" - when TYPE_GVAR - message if global_variables.include?(name) - when TYPE_CVAR - clazz = vm._self - clazz = clazz.singleton_class unless clazz.is_a?(Module) - message if clazz.class_variable_defined?(name) - when TYPE_CONST - raise NotImplementedError, "defined TYPE_CONST" - when TYPE_METHOD - raise NotImplementedError, "defined TYPE_METHOD" - when TYPE_YIELD - raise NotImplementedError, "defined TYPE_YIELD" - when TYPE_ZSUPER - raise NotImplementedError, "defined TYPE_ZSUPER" - when TYPE_REF - raise NotImplementedError, "defined TYPE_REF" - when TYPE_FUNC - message if object.respond_to?(name, true) - when TYPE_CONST_FROM - raise NotImplementedError, "defined TYPE_CONST_FROM" - end - - vm.push(result) - end - end - - # ### Summary - # - # `definemethod` defines a method on the class of the current value of - # `self`. It accepts two arguments. The first is the name of the method - # being defined. The second is the instruction sequence representing the - # body of the method. - # - # ### Usage - # - # ~~~ruby - # def value = "value" - # ~~~ - # - class DefineMethod - attr_reader :method_name, :method_iseq - - def initialize(method_name, method_iseq) - @method_name = method_name - @method_iseq = method_iseq - end - - def disasm(fmt) - fmt.enqueue(method_iseq) - fmt.instruction( - "definemethod", - [fmt.object(method_name), method_iseq.name] - ) - end - - def to_a(_iseq) - [:definemethod, method_name, method_iseq.to_a] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - name = method_name - iseq = method_iseq - - vm - ._self - .__send__(:define_method, name) do |*args, **kwargs, &block| - vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) - end - end - end - - # ### Summary - # - # `definesmethod` defines a method on the singleton class of the current - # value of `self`. It accepts two arguments. The first is the name of the - # method being defined. The second is the instruction sequence representing - # the body of the method. It pops the object off the stack that the method - # should be defined on. - # - # ### Usage - # - # ~~~ruby - # def self.value = "value" - # ~~~ - # - class DefineSMethod - attr_reader :method_name, :method_iseq - - def initialize(method_name, method_iseq) - @method_name = method_name - @method_iseq = method_iseq - end - - def disasm(fmt) - fmt.enqueue(method_iseq) - fmt.instruction( - "definesmethod", - [fmt.object(method_name), method_iseq.name] - ) - end - - def to_a(_iseq) - [:definesmethod, method_name, method_iseq.to_a] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - name = method_name - iseq = method_iseq - - vm - ._self - .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| - vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) - end - end - end - - # ### Summary - # - # `dup` copies the top value of the stack and pushes it onto the stack. - # - # ### Usage - # - # ~~~ruby - # $global = 5 - # ~~~ - # - class Dup - def disasm(fmt) - fmt.instruction("dup") - end - - def to_a(_iseq) - [:dup] - end - - def length - 1 - end - - def pops - 1 - end - - def pushes - 2 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.stack.last.dup) - end - end - - # ### Summary - # - # `duparray` dups an Array literal and pushes it onto the stack. - # - # ### Usage - # - # ~~~ruby - # [true] - # ~~~ - # - class DupArray - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("duparray", [fmt.object(object)]) - end - - def to_a(_iseq) - [:duparray, object] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `duphash` dups a Hash literal and pushes it onto the stack. - # - # ### Usage - # - # ~~~ruby - # { a: 1 } - # ~~~ - # - class DupHash - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("duphash", [fmt.object(object)]) - end - - def to_a(_iseq) - [:duphash, object] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `dupn` duplicates the top `n` stack elements. - # - # ### Usage - # - # ~~~ruby - # Object::X ||= true - # ~~~ - # - class DupN - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("dupn", [fmt.object(number)]) - end - - def to_a(_iseq) - [:dupn, number] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - number - end - - def canonical - self - end - - def call(vm) - values = vm.pop(number) - vm.push(*values) - vm.push(*values) - end - end - - # ### Summary - # - # `expandarray` looks at the top of the stack, and if the value is an array - # it replaces it on the stack with `number` elements of the array, or `nil` - # if the elements are missing. - # - # ### Usage - # - # ~~~ruby - # x, = [true, false, nil] - # ~~~ - # - class ExpandArray - attr_reader :number, :flags - - def initialize(number, flags) - @number = number - @flags = flags - end - - def disasm(fmt) - fmt.instruction("expandarray", [fmt.object(number), fmt.object(flags)]) - end - - def to_a(_iseq) - [:expandarray, number, flags] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - number - end - - def canonical - self - end - - def call(vm) - raise NotImplementedError, "expandarray" - end - end - - # ### Summary - # - # `getblockparam` is a similar instruction to `getlocal` in that it looks - # for a local variable in the current instruction sequence's local table and - # walks recursively up the parent instruction sequences until it finds it. - # The local it retrieves, however, is a special block local that was passed - # to the current method. It pushes the value of the block local onto the - # stack. - # - # ### Usage - # - # ~~~ruby - # def foo(&block) - # block - # end - # ~~~ - # - class GetBlockParam - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("getblockparam", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = iseq.parent_iseq } - [:getblockparam, current.local_table.offset(index), level] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.local_get(index, level)) - end - end - - # ### Summary - # - # `getblockparamproxy` is almost the same as `getblockparam` except that it - # pushes a proxy object onto the stack instead of the actual value of the - # block local. This is used when a method is being called on the block - # local. - # - # ### Usage - # - # ~~~ruby - # def foo(&block) - # block.call - # end - # ~~~ - # - class GetBlockParamProxy - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction( - "getblockparamproxy", - [fmt.local(index, explicit: level)] - ) - end - - def to_a(iseq) - current = iseq - level.times { current = iseq.parent_iseq } - [:getblockparamproxy, current.local_table.offset(index), level] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.local_get(index, level)) - end - end - - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. It uses an inline cache to reduce the - # need to lookup the class variable in the class hierarchy every time. - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariable - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "getclassvariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:getclassvariable, name, cache] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - clazz = vm._self - clazz = clazz.class unless clazz.is_a?(Class) - vm.push(clazz.class_variable_get(name)) - end - end - - # ### Summary - # - # `getconstant` performs a constant lookup and pushes the value of the - # constant onto the stack. It pops both the class it should look in and - # whether or not it should look globally as well. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class GetConstant - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("getconstant", [fmt.object(name)]) - end - - def to_a(_iseq) - [:getconstant, name] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - # const_base, allow_nil = - vm.pop(2) - - vm.frame.nesting.reverse_each do |clazz| - if clazz.const_defined?(name) - vm.push(clazz.const_get(name)) - return - end - end - - raise NameError, "uninitialized constant #{name}" - end - end - - # ### Summary - # - # `getglobal` pushes the value of a global variables onto the stack. - # - # ### Usage - # - # ~~~ruby - # $$ - # ~~~ - # - class GetGlobal - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("getglobal", [fmt.object(name)]) - end - - def to_a(_iseq) - [:getglobal, name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - # Evaluating the name of the global variable because there isn't a - # reflection API for global variables. - vm.push(eval(name.to_s, binding, __FILE__, __LINE__)) - end - end - - # ### Summary - # - # `getinstancevariable` pushes the value of an instance variable onto the - # stack. It uses an inline cache to avoid having to look up the instance - # variable in the class hierarchy every time. - # - # This instruction has two forms, but both have the same structure. Before - # Ruby 3.2, the inline cache corresponded to both the get and set - # instructions and could be shared. Since Ruby 3.2, it uses object shapes - # instead so the caches are unique per instruction. - # - # ### Usage - # - # ~~~ruby - # @instance_variable - # ~~~ - # - class GetInstanceVariable - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "getinstancevariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:getinstancevariable, name, cache] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - method = Object.instance_method(:instance_variable_get) - vm.push(method.bind(vm._self).call(name)) - end - end - - # ### Summary - # - # `getlocal` fetches the value of a local variable from a frame determined - # by the level and index arguments. The level is the number of frames back - # to look and the index is the index in the local table. It pushes the value - # it finds onto the stack. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # tap { tap { value } } - # ~~~ - # - class GetLocal - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("getlocal", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:getlocal, current.local_table.offset(index), level] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.local_get(index, level)) - end - end - - # ### Summary - # - # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the current frame determined by - # the index given as its only argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # value - # ~~~ - # - class GetLocalWC0 - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("getlocal_WC_0", [fmt.local(index, implicit: 0)]) - end - - def to_a(iseq) - [:getlocal_WC_0, iseq.local_table.offset(index)] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - GetLocal.new(index, 0) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the parent frame determined by - # the index given as its only argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # self.then { value } - # ~~~ - # - class GetLocalWC1 - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("getlocal_WC_1", [fmt.local(index, implicit: 1)]) - end - - def to_a(iseq) - [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - GetLocal.new(index, 1) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `getspecial` pushes the value of a special local variable onto the stack. - # - # ### Usage - # - # ~~~ruby - # 1 if (a == 1) .. (b == 2) - # ~~~ - # - class GetSpecial - SVAR_LASTLINE = 0 # $_ - SVAR_BACKREF = 1 # $~ - SVAR_FLIPFLOP_START = 2 # flipflop - - attr_reader :key, :type - - def initialize(key, type) - @key = key - @type = type - end - - def disasm(fmt) - fmt.instruction("getspecial", [fmt.object(key), fmt.object(type)]) - end - - def to_a(_iseq) - [:getspecial, key, type] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - case key - when SVAR_LASTLINE - raise NotImplementedError, "getspecial SVAR_LASTLINE" - when SVAR_BACKREF - raise NotImplementedError, "getspecial SVAR_BACKREF" - when SVAR_FLIPFLOP_START - vm.frame_svar.svars[SVAR_FLIPFLOP_START] - end - end - end - - # ### Summary - # - # `intern` converts the top element of the stack to a symbol and pushes the - # symbol onto the stack. - # - # ### Usage - # - # ~~~ruby - # :"#{"foo"}" - # ~~~ - # - class Intern - def disasm(fmt) - fmt.instruction("intern") - end - - def to_a(_iseq) - [:intern] - end - - def length - 1 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop.to_sym) - end - end - - # ### Summary - # - # `invokeblock` invokes the block given to the current method. It pops the - # arguments for the block off the stack and pushes the result of running the - # block onto the stack. - # - # ### Usage - # - # ~~~ruby - # def foo - # yield - # end - # ~~~ - # - class InvokeBlock - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("invokeblock", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:invokeblock, calldata.to_h] - end - - def length - 2 - end - - def pops - calldata.argc - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) - end - end - - # ### Summary - # - # `invokesuper` is similar to the `send` instruction, except that it calls - # the super method. It pops the receiver and arguments off the stack and - # pushes the return value onto the stack. - # - # ### Usage - # - # ~~~ruby - # def foo - # super - # end - # ~~~ - # - class InvokeSuper - attr_reader :calldata, :block_iseq - - def initialize(calldata, block_iseq) - @calldata = calldata - @block_iseq = block_iseq - end - - def disasm(fmt) - fmt.enqueue(block_iseq) if block_iseq - fmt.instruction( - "invokesuper", - [fmt.calldata(calldata), block_iseq&.name || "nil"] - ) - end - - def to_a(_iseq) - [:invokesuper, calldata.to_h, block_iseq&.to_a] - end - - def length - 1 - end - - def pops - argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) - argb + calldata.argc + 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - block = - if (iseq = block_iseq) - ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, *args, **kwargs, &blk) - end - end - - keywords = - if calldata.kw_arg - calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h - else - {} - end - - arguments = vm.pop(calldata.argc) - receiver = vm.pop - - method = receiver.method(vm.frame.name).super_method - vm.push(method.call(*arguments, **keywords, &block)) - end - end - - # ### Summary - # - # `jump` unconditionally jumps to the label given as its only argument. - # - # ### Usage - # - # ~~~ruby - # x = 0 - # if x == 0 - # puts "0" - # else - # puts "2" - # end - # ~~~ - # - class Jump - attr_reader :label - - def initialize(label) - @label = label - end - - def disasm(fmt) - fmt.instruction("jump", [fmt.label(label)]) - end - - def to_a(_iseq) - [:jump, label.name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.jump(label) - end - end - - # ### Summary - # - # `leave` exits the current frame. - # - # ### Usage - # - # ~~~ruby - # ;; - # ~~~ - # - class Leave - def disasm(fmt) - fmt.instruction("leave") - end - - def to_a(_iseq) - [:leave] - end - - def length - 1 - end - - def pops - 1 - end - - def pushes - # TODO: This is wrong. It should be 1. But it's 0 for now because - # otherwise the stack size is incorrectly calculated. - 0 - end - - def canonical - self - end - - def call(vm) - vm.leave - end - end - - # ### Summary - # - # `newarray` puts a new array initialized with `number` values from the - # stack. It pops `number` values off the stack and pushes the array onto the - # stack. - # - # ### Usage - # - # ~~~ruby - # ["string"] - # ~~~ - # - class NewArray - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("newarray", [fmt.object(number)]) - end - - def to_a(_iseq) - [:newarray, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number)) - end - end - - # ### Summary - # - # `newarraykwsplat` is a specialized version of `newarray` that takes a ** - # splat argument. It pops `number` values off the stack and pushes the array - # onto the stack. - # - # ### Usage - # - # ~~~ruby - # ["string", **{ foo: "bar" }] - # ~~~ - # - class NewArrayKwSplat - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("newarraykwsplat", [fmt.object(number)]) - end - - def to_a(_iseq) - [:newarraykwsplat, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number)) - end - end - - # ### Summary - # - # `newhash` puts a new hash onto the stack, using `number` elements from the - # stack. `number` needs to be even. It pops `number` elements off the stack - # and pushes a hash onto the stack. - # - # ### Usage - # - # ~~~ruby - # def foo(key, value) - # { key => value } - # end - # ~~~ - # - class NewHash - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("newhash", [fmt.object(number)]) - end - - def to_a(_iseq) - [:newhash, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number).each_slice(2).to_h) - end - end - - # ### Summary - # - # `newrange` creates a new range object from the top two values on the - # stack. It pops both of them off, and then pushes on the new range. It - # takes one argument which is 0 if the end is included or 1 if the end value - # is excluded. - # - # ### Usage - # - # ~~~ruby - # x = 0 - # y = 1 - # p (x..y), (x...y) - # ~~~ - # - class NewRange - attr_reader :exclude_end - - def initialize(exclude_end) - @exclude_end = exclude_end - end - - def disasm(fmt) - fmt.instruction("newrange", [fmt.object(exclude_end)]) - end - - def to_a(_iseq) - [:newrange, exclude_end] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(Range.new(*vm.pop(2), exclude_end == 1)) - end - end - - # ### Summary - # - # `nop` is a no-operation instruction. It is used to pad the instruction - # sequence so there is a place for other instructions to jump to. - # - # ### Usage - # - # ~~~ruby - # raise rescue true - # ~~~ - # - class Nop - def disasm(fmt) - fmt.instruction("nop") - end - - def to_a(_iseq) - [:nop] - end - - def length - 1 - end - - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - end - end - - # ### Summary - # - # `objtostring` pops a value from the stack, calls `to_s` on that value and - # then pushes the result back to the stack. - # - # It has various fast paths for classes like String, Symbol, Module, Class, - # etc. For everything else it calls `to_s`. - # - # ### Usage - # - # ~~~ruby - # "#{5}" - # ~~~ - # - class ObjToString - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("objtostring", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:objtostring, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop.to_s) - end - end - - # ### Summary - # - # `once` is an instruction that wraps an instruction sequence and ensures - # that is it only ever executed once for the lifetime of the program. It - # uses a cache to ensure that it is only executed once. It pushes the result - # of running the instruction sequence onto the stack. - # - # ### Usage - # - # ~~~ruby - # END { puts "END" } - # ~~~ - # - class Once - attr_reader :iseq, :cache - - def initialize(iseq, cache) - @iseq = iseq - @cache = cache - end - - def disasm(fmt) - fmt.enqueue(iseq) - fmt.instruction("once", [iseq.name, fmt.inline_storage(cache)]) - end - - def to_a(_iseq) - [:once, iseq.to_a, cache] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - return if @executed - vm.push(vm.run_block_frame(iseq)) - @executed = true - end - end - - # ### Summary - # - # `opt_and` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `&` operator is used. There is a fast path for if - # both operands are integers. It pops both the receiver and the argument off - # the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 & 3 - # ~~~ - # - class OptAnd - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_and", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_and, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_aref` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `[]` operator is used. There are fast paths if the - # receiver is an integer, array, or hash. - # - # ### Usage - # - # ~~~ruby - # 7[2] - # ~~~ - # - class OptAref - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_aref", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_aref, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_aref_with` is a specialization of the `opt_aref` instruction that - # occurs when the `[]` operator is used with a string argument known at - # compile time. There are fast paths if the receiver is a hash. It pops the - # receiver off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # { 'test' => true }['test'] - # ~~~ - # - class OptArefWith - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_aref_with", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_aref_with, object, calldata.to_h] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop[object]) - end - end - - # ### Summary - # - # `opt_aset` is an instruction for setting the hash value by the key in - # the `recv[obj] = set` format. It is a specialization of the - # `opt_send_without_block` instruction. It pops the receiver, the key, and - # the value off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # {}[:key] = value - # ~~~ - # - class OptAset - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_aset", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_aset, calldata.to_h] - end - - def length - 2 - end - - def pops - 3 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_aset_with` is an instruction for setting the hash value by the known - # string key in the `recv[obj] = set` format. It pops the receiver and the - # value off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # {}["key"] = value - # ~~~ - # - class OptAsetWith - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_aset_with", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_aset_with, object, calldata.to_h] - end - - def length - 3 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - hash, value = vm.pop(2) - vm.push(hash[object] = value) - end - end - - # ### Summary - # - # `opt_case_dispatch` is a branch instruction that moves the control flow - # for case statements that have clauses where they can all be used as hash - # keys for an internal hash. - # - # It has two arguments: the `case_dispatch_hash` and an `else_label`. It - # pops one value off the stack: a hash key. `opt_case_dispatch` looks up the - # key in the `case_dispatch_hash` and jumps to the corresponding label if - # there is one. If there is no value in the `case_dispatch_hash`, - # `opt_case_dispatch` jumps to the `else_label` index. - # - # ### Usage - # - # ~~~ruby - # case 1 - # when 1 - # puts "foo" - # else - # puts "bar" - # end - # ~~~ - # - class OptCaseDispatch - attr_reader :case_dispatch_hash, :else_label - - def initialize(case_dispatch_hash, else_label) - @case_dispatch_hash = case_dispatch_hash - @else_label = else_label - end - - def disasm(fmt) - fmt.instruction( - "opt_case_dispatch", - ["", fmt.label(else_label)] - ) - end - - def to_a(_iseq) - [ - :opt_case_dispatch, - case_dispatch_hash.flat_map { |key, value| [key, value.name] }, - else_label.name - ] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) - end - end - - # ### Summary - # - # `opt_div` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `/` operator is used. There are fast paths for if - # both operands are integers, or if both operands are floats. It pops both - # the receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 / 3 - # ~~~ - # - class OptDiv - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_div", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_div, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_empty_p` is an optimization applied when the method `empty?` is - # called. It pops the receiver off the stack and pushes on the result of the - # method call. - # - # ### Usage - # - # ~~~ruby - # "".empty? - # ~~~ - # - class OptEmptyP - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_empty_p", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_empty_p, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_eq` is a specialization of the `opt_send_without_block` instruction - # that occurs when the == operator is used. Fast paths exist when both - # operands are integers, floats, symbols or strings. It pops both the - # receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 == 2 - # ~~~ - # - class OptEq - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_eq", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_eq, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_ge` is a specialization of the `opt_send_without_block` instruction - # that occurs when the >= operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 4 >= 3 - # ~~~ - # - class OptGE - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_ge", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_ge, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_getconstant_path` performs a constant lookup on a chain of constant - # names. It accepts as its argument an array of constant names, and pushes - # the value of the constant onto the stack. - # - # ### Usage - # - # ~~~ruby - # ::Object - # ~~~ - # - class OptGetConstantPath - attr_reader :names - - def initialize(names) - @names = names - end - - def disasm(fmt) - cache = "" - fmt.instruction("opt_getconstant_path", [cache]) - end - - def to_a(_iseq) - [:opt_getconstant_path, names] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - current = vm._self - current = current.class unless current.is_a?(Class) - - names.each do |name| - current = name == :"" ? Object : current.const_get(name) - end - - vm.push(current) - end - end - - # ### Summary - # - # `opt_gt` is a specialization of the `opt_send_without_block` instruction - # that occurs when the > operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 4 > 3 - # ~~~ - # - class OptGT - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_gt", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_gt, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_le` is a specialization of the `opt_send_without_block` instruction - # that occurs when the <= operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 3 <= 4 - # ~~~ - # - class OptLE - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_le", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_le, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_length` is a specialization of `opt_send_without_block`, when the - # `length` method is called. There are fast paths when the receiver is - # either a string, hash, or array. It pops the receiver off the stack and - # pushes on the result of the method call. - # - # ### Usage - # - # ~~~ruby - # "".length - # ~~~ - # - class OptLength - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_length", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_length, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_lt` is a specialization of the `opt_send_without_block` instruction - # that occurs when the < operator is used. Fast paths exist when both - # operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 3 < 4 - # ~~~ - # - class OptLT - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_lt", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_lt, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_ltlt` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `<<` operator is used. Fast paths exists when the - # receiver is either a String or an Array. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # "" << 2 - # ~~~ - # - class OptLTLT - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_ltlt", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_ltlt, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_minus` is a specialization of the `opt_send_without_block` - # instruction that occurs when the `-` operator is used. There are fast - # paths for if both operands are integers or if both operands are floats. It - # pops both the receiver and the argument off the stack and pushes on the - # result. - # - # ### Usage - # - # ~~~ruby - # 3 - 2 - # ~~~ - # - class OptMinus - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_minus", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_minus, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_mod` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `%` operator is used. There are fast paths for if - # both operands are integers or if both operands are floats. It pops both - # the receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 4 % 2 - # ~~~ - # - class OptMod - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_mod", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_mod, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_mult` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `*` operator is used. There are fast paths for if - # both operands are integers or floats. It pops both the receiver and the - # argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 3 * 2 - # ~~~ - # - class OptMult - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_mult", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_mult, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_neq` is an optimization that tests whether two values at the top of - # the stack are not equal by testing their equality and calling the `!` on - # the result. This allows `opt_neq` to use the fast paths optimized in - # `opt_eq` when both operands are Integers, Floats, Symbols, or Strings. It - # pops both the receiver and the argument off the stack and pushes on the - # result. - # - # ### Usage - # - # ~~~ruby - # 2 != 2 - # ~~~ - # - class OptNEq - attr_reader :eq_calldata, :neq_calldata - - def initialize(eq_calldata, neq_calldata) - @eq_calldata = eq_calldata - @neq_calldata = neq_calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_neq", - [fmt.calldata(eq_calldata), fmt.calldata(neq_calldata)] - ) - end - - def to_a(_iseq) - [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] - end - - def length - 3 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - receiver, argument = vm.pop(2) - vm.push(receiver != argument) - end - end - - # ### Summary - # - # `opt_newarray_max` is a specialization that occurs when the `max` method - # is called on an array literal. It pops the values of the array off the - # stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # [a, b, c].max - # ~~~ - # - class OptNewArrayMax - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("opt_newarray_max", [fmt.object(number)]) - end - - def to_a(_iseq) - [:opt_newarray_max, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number).max) - end - end - - # ### Summary - # - # `opt_newarray_min` is a specialization that occurs when the `min` method - # is called on an array literal. It pops the values of the array off the - # stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # [a, b, c].min - # ~~~ - # - class OptNewArrayMin - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("opt_newarray_min", [fmt.object(number)]) - end - - def to_a(_iseq) - [:opt_newarray_min, number] - end - - def length - 2 - end - - def pops - number - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.pop(number).min) - end - end - - # ### Summary - # - # `opt_nil_p` is an optimization applied when the method `nil?` is called. - # It returns true immediately when the receiver is `nil` and defers to the - # `nil?` method in other cases. It pops the receiver off the stack and - # pushes on the result. - # - # ### Usage - # - # ~~~ruby - # "".nil? - # ~~~ - # - class OptNilP - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_nil_p", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_nil_p, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_not` negates the value on top of the stack by calling the `!` method - # on it. It pops the receiver off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # !true - # ~~~ - # - class OptNot - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_not", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_not, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_or` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `|` operator is used. There is a fast path for if - # both operands are integers. It pops both the receiver and the argument off - # the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 | 3 - # ~~~ - # - class OptOr - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_or", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_or, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_plus` is a specialization of the `opt_send_without_block` instruction - # that occurs when the `+` operator is used. There are fast paths for if - # both operands are integers, floats, strings, or arrays. It pops both the - # receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # 2 + 3 - # ~~~ - # - class OptPlus - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_plus", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_plus, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_regexpmatch2` is a specialization of the `opt_send_without_block` - # instruction that occurs when the `=~` operator is used. It pops both the - # receiver and the argument off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # /a/ =~ "a" - # ~~~ - # - class OptRegExpMatch2 - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_regexpmatch2", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_regexpmatch2, calldata.to_h] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_send_without_block` is a specialization of the send instruction that - # occurs when a method is being called without a block. It pops the receiver - # and the arguments off the stack and pushes on the result. - # - # ### Usage - # - # ~~~ruby - # puts "Hello, world!" - # ~~~ - # - class OptSendWithoutBlock - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_send_without_block", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_send_without_block, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 + calldata.argc - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_size` is a specialization of `opt_send_without_block`, when the - # `size` method is called. There are fast paths when the receiver is either - # a string, hash, or array. It pops the receiver off the stack and pushes on - # the result. - # - # ### Usage - # - # ~~~ruby - # "".size - # ~~~ - # - class OptSize - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_size", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_size, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `opt_str_freeze` pushes a frozen known string value with no interpolation - # onto the stack using the #freeze method. If the method gets overridden, - # this will fall back to a send. - # - # ### Usage - # - # ~~~ruby - # "hello".freeze - # ~~~ - # - class OptStrFreeze - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_str_freeze", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_str_freeze, object, calldata.to_h] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(object.freeze) - end - end - - # ### Summary - # - # `opt_str_uminus` pushes a frozen known string value with no interpolation - # onto the stack. If the method gets overridden, this will fall back to a - # send. - # - # ### Usage - # - # ~~~ruby - # -"string" - # ~~~ - # - class OptStrUMinus - attr_reader :object, :calldata - - def initialize(object, calldata) - @object = object - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction( - "opt_str_uminus", - [fmt.object(object), fmt.calldata(calldata)] - ) - end - - def to_a(_iseq) - [:opt_str_uminus, object, calldata.to_h] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(-object) - end - end - - # ### Summary - # - # `opt_succ` is a specialization of the `opt_send_without_block` instruction - # when the method being called is `succ`. Fast paths exist when the receiver - # is either a String or a Fixnum. It pops the receiver off the stack and - # pushes on the result. - # - # ### Usage - # - # ~~~ruby - # "".succ - # ~~~ - # - class OptSucc - attr_reader :calldata - - def initialize(calldata) - @calldata = calldata - end - - def disasm(fmt) - fmt.instruction("opt_succ", [fmt.calldata(calldata)]) - end - - def to_a(_iseq) - [:opt_succ, calldata.to_h] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - Send.new(calldata, nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `pop` pops the top value off the stack. - # - # ### Usage - # - # ~~~ruby - # a ||= 2 - # ~~~ - # - class Pop - def disasm(fmt) - fmt.instruction("pop") - end - - def to_a(_iseq) - [:pop] - end - - def length - 1 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.pop - end - end - - # ### Summary - # - # `putnil` pushes a global nil object onto the stack. - # - # ### Usage - # - # ~~~ruby - # nil - # ~~~ - # - class PutNil - def disasm(fmt) - fmt.instruction("putnil") - end - - def to_a(_iseq) - [:putnil] - end - - def length - 1 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - PutObject.new(nil) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `putobject` pushes a known value onto the stack. - # - # ### Usage - # - # ~~~ruby - # 5 - # ~~~ - # - class PutObject - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putobject", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putobject, object] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(object) - end - end - - # ### Summary - # - # `putobject_INT2FIX_0_` pushes 0 on the stack. It is a specialized - # instruction resulting from the operand unification optimization. It is - # equivalent to `putobject 0`. - # - # ### Usage - # - # ~~~ruby - # 0 - # ~~~ - # - class PutObjectInt2Fix0 - def disasm(fmt) - fmt.instruction("putobject_INT2FIX_0_") - end - - def to_a(_iseq) - [:putobject_INT2FIX_0_] - end - - def length - 1 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - PutObject.new(0) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `putobject_INT2FIX_1_` pushes 1 on the stack. It is a specialized - # instruction resulting from the operand unification optimization. It is - # equivalent to `putobject 1`. - # - # ### Usage - # - # ~~~ruby - # 1 - # ~~~ - # - class PutObjectInt2Fix1 - def disasm(fmt) - fmt.instruction("putobject_INT2FIX_1_") - end - - def to_a(_iseq) - [:putobject_INT2FIX_1_] - end - - def length - 1 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - PutObject.new(1) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `putself` pushes the current value of self onto the stack. - # - # ### Usage - # - # ~~~ruby - # puts "Hello, world!" - # ~~~ - # - class PutSelf - def disasm(fmt) - fmt.instruction("putself") - end - - def to_a(_iseq) - [:putself] - end - - def length - 1 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm._self) - end - end - - # ### Summary - # - # `putspecialobject` pushes one of three special objects onto the stack. - # These are either the VM core special object, the class base special - # object, or the constant base special object. - # - # ### Usage - # - # ~~~ruby - # alias foo bar - # ~~~ - # - class PutSpecialObject - OBJECT_VMCORE = 1 - OBJECT_CBASE = 2 - OBJECT_CONST_BASE = 3 - - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putspecialobject", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putspecialobject, object] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - case object - when OBJECT_VMCORE - vm.push(vm.frozen_core) - when OBJECT_CBASE - value = vm._self - value = value.singleton_class unless value.is_a?(Class) - vm.push(value) - when OBJECT_CONST_BASE - vm.push(vm.const_base) - end - end - end - - # ### Summary - # - # `putstring` pushes an unfrozen string literal onto the stack. - # - # ### Usage - # - # ~~~ruby - # "foo" - # ~~~ - # - class PutString - attr_reader :object - - def initialize(object) - @object = object - end - - def disasm(fmt) - fmt.instruction("putstring", [fmt.object(object)]) - end - - def to_a(_iseq) - [:putstring, object] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(object.dup) - end - end - - # ### Summary - # - # `send` invokes a method with an optional block. It pops its receiver and - # the arguments for the method off the stack and pushes the return value - # onto the stack. It has two arguments: the calldata for the call site and - # the optional block instruction sequence. - # - # ### Usage - # - # ~~~ruby - # "hello".tap { |i| p i } - # ~~~ - # - class Send - attr_reader :calldata, :block_iseq - - def initialize(calldata, block_iseq) - @calldata = calldata - @block_iseq = block_iseq - end - - def disasm(fmt) - fmt.enqueue(block_iseq) if block_iseq - fmt.instruction( - "send", - [fmt.calldata(calldata), block_iseq&.name || "nil"] - ) - end - - def to_a(_iseq) - [:send, calldata.to_h, block_iseq&.to_a] - end - - def length - 3 - end - - def pops - argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) - argb + calldata.argc + 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - block = - if (iseq = block_iseq) - ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, *args, **kwargs, &blk) - end - end - - keywords = - if calldata.kw_arg - calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h - else - {} - end - - arguments = vm.pop(calldata.argc) - receiver = vm.pop - - vm.push( - receiver.__send__(calldata.method, *arguments, **keywords, &block) - ) - end - end - - # ### Summary - # - # `setblockparam` sets the value of a block local variable on a frame - # determined by the level and index arguments. The level is the number of - # frames back to look and the index is the index in the local table. It pops - # the value it is setting off the stack. - # - # ### Usage - # - # ~~~ruby - # def foo(&bar) - # bar = baz - # end - # ~~~ - # - class SetBlockParam - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("setblockparam", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:setblockparam, current.local_table.offset(index), level] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.local_set(index, level, vm.pop) - end - end - - # ### Summary - # - # `setclassvariable` looks for a class variable in the current class and - # sets its value to the value it pops off the top of the stack. It uses an - # inline cache to reduce the need to lookup the class variable in the class - # hierarchy every time. - # - # ### Usage - # - # ~~~ruby - # @@class_variable = 1 - # ~~~ - # - class SetClassVariable - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "setclassvariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:setclassvariable, name, cache] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - clazz = vm._self - clazz = clazz.class unless clazz.is_a?(Class) - clazz.class_variable_set(name, vm.pop) - end - end - - # ### Summary - # - # `setconstant` pops two values off the stack: the value to set the - # constant to and the constant base to set it in. - # - # ### Usage - # - # ~~~ruby - # Constant = 1 - # ~~~ - # - class SetConstant - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("setconstant", [fmt.object(name)]) - end - - def to_a(_iseq) - [:setconstant, name] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - value, parent = vm.pop(2) - parent.const_set(name, value) - end - end - - # ### Summary - # - # `setglobal` sets the value of a global variable to a value popped off the - # top of the stack. - # - # ### Usage - # - # ~~~ruby - # $global = 5 - # ~~~ - # - class SetGlobal - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("setglobal", [fmt.object(name)]) - end - - def to_a(_iseq) - [:setglobal, name] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - # Evaluating the name of the global variable because there isn't a - # reflection API for global variables. - eval("#{name} = vm.pop", binding, __FILE__, __LINE__) - end - end - - # ### Summary - # - # `setinstancevariable` pops a value off the top of the stack and then sets - # the instance variable associated with the instruction to that value. - # - # This instruction has two forms, but both have the same structure. Before - # Ruby 3.2, the inline cache corresponded to both the get and set - # instructions and could be shared. Since Ruby 3.2, it uses object shapes - # instead so the caches are unique per instruction. - # - # ### Usage - # - # ~~~ruby - # @instance_variable = 1 - # ~~~ - # - class SetInstanceVariable - attr_reader :name, :cache - - def initialize(name, cache) - @name = name - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "setinstancevariable", - [fmt.object(name), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:setinstancevariable, name, cache] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - method = Object.instance_method(:instance_variable_set) - method.bind(vm._self).call(name, vm.pop) - end - end - - # ### Summary - # - # `setlocal` sets the value of a local variable on a frame determined by the - # level and index arguments. The level is the number of frames back to - # look and the index is the index in the local table. It pops the value it - # is setting off the stack. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # tap { tap { value = 10 } } - # ~~~ - # - class SetLocal - attr_reader :index, :level - - def initialize(index, level) - @index = index - @level = level - end - - def disasm(fmt) - fmt.instruction("setlocal", [fmt.local(index, explicit: level)]) - end - - def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:setlocal, current.local_table.offset(index), level] - end - - def length - 3 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - vm.local_set(index, level, vm.pop) - end - end - - # ### Summary - # - # `setlocal_WC_0` is a specialized version of the `setlocal` instruction. It - # sets the value of a local variable on the current frame to the value at - # the top of the stack as determined by the index given as its only - # argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # ~~~ - # - class SetLocalWC0 - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("setlocal_WC_0", [fmt.local(index, implicit: 0)]) - end - - def to_a(iseq) - [:setlocal_WC_0, iseq.local_table.offset(index)] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - SetLocal.new(index, 0) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `setlocal_WC_1` is a specialized version of the `setlocal` instruction. It - # sets the value of a local variable on the parent frame to the value at the - # top of the stack as determined by the index given as its only argument. - # - # ### Usage - # - # ~~~ruby - # value = 5 - # self.then { value = 10 } - # ~~~ - # - class SetLocalWC1 - attr_reader :index - - def initialize(index) - @index = index - end - - def disasm(fmt) - fmt.instruction("setlocal_WC_1", [fmt.local(index, implicit: 1)]) - end - - def to_a(iseq) - [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - SetLocal.new(index, 1) - end - - def call(vm) - canonical.call(vm) - end - end - - # ### Summary - # - # `setn` sets a value in the stack to a value popped off the top of the - # stack. It then pushes that value onto the top of the stack as well. - # - # ### Usage - # - # ~~~ruby - # {}[:key] = 'val' - # ~~~ - # - class SetN - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("setn", [fmt.object(number)]) - end - - def to_a(_iseq) - [:setn, number] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.stack[-number - 1] = vm.stack.last - end - end - - # ### Summary - # - # `setspecial` pops a value off the top of the stack and sets a special - # local variable to that value. The special local variable is determined by - # the key given as its only argument. - # - # ### Usage - # - # ~~~ruby - # baz if (foo == 1) .. (bar == 1) - # ~~~ - # - class SetSpecial - attr_reader :key - - def initialize(key) - @key = key - end - - def disasm(fmt) - fmt.instruction("setspecial", [fmt.object(key)]) - end - - def to_a(_iseq) - [:setspecial, key] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - - def canonical - self - end - - def call(vm) - case key - when GetSpecial::SVAR_LASTLINE - raise NotImplementedError, "svar SVAR_LASTLINE" - when GetSpecial::SVAR_BACKREF - raise NotImplementedError, "setspecial SVAR_BACKREF" - when GetSpecial::SVAR_FLIPFLOP_START - vm.frame_svar.svars[GetSpecial::SVAR_FLIPFLOP_START] - end - end - end - - # ### Summary - # - # `splatarray` coerces the array object at the top of the stack into Array - # by calling `to_a`. It pushes a duplicate of the array if there is a flag, - # and the original array if there isn't one. - # - # ### Usage - # - # ~~~ruby - # x = *(5) - # ~~~ - # - class SplatArray - attr_reader :flag - - def initialize(flag) - @flag = flag - end - - def disasm(fmt) - fmt.instruction("splatarray", [fmt.object(flag)]) - end - - def to_a(_iseq) - [:splatarray, flag] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(*vm.pop) - end - end - - # ### Summary - # - # `swap` swaps the top two elements in the stack. - # - # ### TracePoint - # - # `swap` does not dispatch any events. - # - # ### Usage - # - # ~~~ruby - # !!defined?([[]]) - # ~~~ - # - class Swap - def disasm(fmt) - fmt.instruction("swap") - end - - def to_a(_iseq) - [:swap] - end - - def length - 1 - end - - def pops - 2 - end - - def pushes - 2 - end - - def canonical - self - end - - def call(vm) - left, right = vm.pop(2) - vm.push(right, left) - end - end - - # ### Summary - # - # `throw` pops a value off the top of the stack and throws it. It is caught - # using the instruction sequence's (or an ancestor's) catch table. It pushes - # on the result of throwing the value. - # - # ### Usage - # - # ~~~ruby - # [1, 2, 3].map { break 2 } - # ~~~ - # - class Throw - TAG_NONE = 0x0 - TAG_RETURN = 0x1 - TAG_BREAK = 0x2 - TAG_NEXT = 0x3 - TAG_RETRY = 0x4 - TAG_REDO = 0x5 - TAG_RAISE = 0x6 - TAG_THROW = 0x7 - TAG_FATAL = 0x8 - - attr_reader :type - - def initialize(type) - @type = type - end - - def disasm(fmt) - fmt.instruction("throw", [fmt.object(type)]) - end - - def to_a(_iseq) - [:throw, type] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - raise NotImplementedError, "throw" - end - end - - # ### Summary - # - # `topn` pushes a single value onto the stack that is a copy of the value - # within the stack that is `number` of slots down from the top. - # - # ### Usage - # - # ~~~ruby - # case 3 - # when 1..5 - # puts "foo" - # end - # ~~~ - # - class TopN - attr_reader :number - - def initialize(number) - @number = number - end - - def disasm(fmt) - fmt.instruction("topn", [fmt.object(number)]) - end - - def to_a(_iseq) - [:topn, number] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(vm.stack[-number - 1]) - end - end - - # ### Summary - # - # `toregexp` pops a number of values off the stack, combines them into a new - # regular expression, and pushes the new regular expression onto the stack. - # - # ### Usage - # - # ~~~ruby - # /foo #{bar}/ - # ~~~ - # - class ToRegExp - attr_reader :options, :length - - def initialize(options, length) - @options = options - @length = length - end - - def disasm(fmt) - fmt.instruction("toregexp", [fmt.object(options), fmt.object(length)]) - end - - def to_a(_iseq) - [:toregexp, options, length] - end - - def pops - length - end - - def pushes - 1 - end - - def canonical - self - end - - def call(vm) - vm.push(Regexp.new(vm.pop(length).join, options)) - end - end - end -end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb deleted file mode 100644 index 30a95437..00000000 --- a/lib/syntax_tree/yarv/legacy.rb +++ /dev/null @@ -1,192 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This module contains the instructions that used to be a part of YARV but - # have been replaced or removed in more recent versions. - module Legacy - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. - # - # This version of the `getclassvariable` instruction is no longer used - # since in Ruby 3.0 it gained an inline cache.` - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariable - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("getclassvariable", [fmt.object(name)]) - end - - def to_a(_iseq) - [:getclassvariable, name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - end - - # ### Summary - # - # `opt_getinlinecache` is a wrapper around a series of `putobject` and - # `getconstant` instructions that allows skipping past them if the inline - # cache is currently set. It pushes the value of the cache onto the stack - # if it is set, otherwise it pushes `nil`. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class OptGetInlineCache - attr_reader :label, :cache - - def initialize(label, cache) - @label = label - @cache = cache - end - - def disasm(fmt) - fmt.instruction( - "opt_getinlinecache", - [fmt.label(label), fmt.inline_storage(cache)] - ) - end - - def to_a(_iseq) - [:opt_getinlinecache, label.name, cache] - end - - def length - 3 - end - - def pops - 0 - end - - def pushes - 1 - end - - def call(vm) - vm.push(nil) - end - end - - # ### Summary - # - # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops - # the value it should set off the top of the stack. It then pushes that - # value back onto the top of the stack. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class OptSetInlineCache - attr_reader :cache - - def initialize(cache) - @cache = cache - end - - def disasm(fmt) - fmt.instruction("opt_setinlinecache", [fmt.inline_storage(cache)]) - end - - def to_a(_iseq) - [:opt_setinlinecache, cache] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 1 - end - - def call(vm) - vm.push(vm.pop) - end - end - - # ### Summary - # - # `setclassvariable` looks for a class variable in the current class and - # sets its value to the value it pops off the top of the stack. - # - # This version of the `setclassvariable` instruction is no longer used - # since in Ruby 3.0 it gained an inline cache. - # - # ### Usage - # - # ~~~ruby - # @@class_variable = 1 - # ~~~ - # - class SetClassVariable - attr_reader :name - - def initialize(name) - @name = name - end - - def disasm(fmt) - fmt.instruction("setclassvariable", [fmt.object(name)]) - end - - def to_a(_iseq) - [:setclassvariable, name] - end - - def length - 2 - end - - def pops - 1 - end - - def pushes - 0 - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb deleted file mode 100644 index 54cc55ad..00000000 --- a/lib/syntax_tree/yarv/local_table.rb +++ /dev/null @@ -1,89 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def empty? - locals.empty? - end - - def find(name, level = 0) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def name_at(index) - locals[index].name - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - end -end diff --git a/syntax_tree.gemspec b/syntax_tree.gemspec index 19f4ee97..f6c4a734 100644 --- a/syntax_tree.gemspec +++ b/syntax_tree.gemspec @@ -25,7 +25,7 @@ Gem::Specification.new do |spec| spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } spec.require_paths = %w[lib] - spec.add_dependency "prettier_print", ">= 1.1.0" + spec.add_dependency "prettier_print", ">= 1.2.0" spec.add_development_dependency "bundler" spec.add_development_dependency "minitest" diff --git a/tasks/sorbet.rake b/tasks/sorbet.rake new file mode 100644 index 00000000..05f48874 --- /dev/null +++ b/tasks/sorbet.rake @@ -0,0 +1,373 @@ +# frozen_string_literal: true + +module SyntaxTree + class RBI + include DSL + + attr_reader :body, :line + + def initialize + @body = [] + @line = 1 + end + + def generate + require "syntax_tree/reflection" + + body << Comment("# typed: strict", false, location) + @line += 2 + + generate_parent + Reflection.nodes.sort.each { |(_, node)| generate_node(node) } + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("BasicVisitor")), + nil, + BodyStmt( + Statements(generate_visitor("overridable")), + nil, + nil, + nil, + nil + ), + location + ) + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Visitor")), + ConstPathRef(VarRef(Const("SyntaxTree")), Const("BasicVisitor")), + BodyStmt(Statements(generate_visitor("override")), nil, nil, nil, nil), + location + ) + + Formatter.format(nil, Program(Statements(body))) + end + + private + + def generate_comments(comment) + comment + .lines(chomp: true) + .map { |line| Comment("# #{line}", false, location).tap { @line += 1 } } + end + + def generate_parent + attribute = Reflection.nodes[:Program].attributes[:location] + class_location = location + + node_body = generate_comments(attribute.comment) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident("location"))]), + nil, + location + ) + @line += 1 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + nil, + BodyStmt(Statements(node_body), nil, nil, nil, nil), + class_location + ) + @line += 2 + end + + def generate_node(node) + body.concat(generate_comments(node.comment)) + class_location = location + @line += 2 + + body << ClassDeclaration( + ConstPathRef(VarRef(Const("SyntaxTree")), Const(node.name.to_s)), + ConstPathRef(VarRef(Const("SyntaxTree")), Const("Node")), + BodyStmt(Statements(generate_node_body(node)), nil, nil, nil, nil), + class_location + ) + + @line += 2 + end + + def generate_node_body(node) + node_body = [] + node.attributes.sort.each do |(name, attribute)| + next if name == :location + + node_body.concat(generate_comments(attribute.comment)) + node_body << sig_block { sig_returns { sig_type_for(attribute.type) } } + @line += 1 + + node_body << Command( + Ident("attr_reader"), + Args([SymbolLiteral(Ident(attribute.name.to_s))]), + nil, + location + ) + @line += 2 + end + + node_body.concat(generate_initialize(node)) + + node_body << sig_block do + CallNode( + sig_params do + BareAssocHash( + [Assoc(Label("visitor:"), sig_type_for(BasicVisitor))] + ) + end, + Period("."), + Ident("returns"), + ArgParen( + Args( + [CallNode(VarRef(Const("T")), Period("."), Ident("untyped"), nil)] + ) + ) + ) + end + @line += 1 + + node_body << generate_def_node( + "accept", + Paren( + LParen("("), + Params.new(requireds: [Ident("visitor")], location: location) + ) + ) + @line += 2 + + node_body << generate_child_nodes + @line += 1 + + node_body << generate_def_node("child_nodes", nil) + @line += 2 + + node_body << sig_block do + CallNode( + sig_params do + BareAssocHash( + [ + Assoc( + Label("other:"), + CallNode( + VarRef(Const("T")), + Period("."), + Ident("untyped"), + nil + ) + ) + ] + ) + end, + Period("."), + sig_returns { ConstPathRef(VarRef(Const("T")), Const("Boolean")) }, + nil + ) + end + @line += 1 + + node_body << generate_def_node( + "==", + Paren( + LParen("("), + Params.new(location: location, requireds: [Ident("other")]) + ) + ) + @line += 2 + + node_body + end + + def generate_initialize(node) + parameters = + SyntaxTree.const_get(node.name).instance_method(:initialize).parameters + + assocs = + parameters.map do |(_, name)| + Assoc(Label("#{name}:"), sig_type_for(node.attributes[name].type)) + end + + node_body = [] + node_body << sig_block do + CallNode( + sig_params { BareAssocHash(assocs) }, + Period("."), + Ident("void"), + nil + ) + end + @line += 1 + + params = Params.new(location: location) + parameters.each do |(type, name)| + case type + when :req + params.requireds << Ident(name.to_s) + when :keyreq + params.keywords << [Label("#{name}:"), nil] + when :key + params.keywords << [ + Label("#{name}:"), + CallNode( + VarRef(Const("T")), + Period("."), + Ident("unsafe"), + ArgParen(Args([VarRef(Kw("nil"))])) + ) + ] + else + raise + end + end + + node_body << generate_def_node("initialize", Paren(LParen("("), params)) + @line += 2 + + node_body + end + + def generate_child_nodes + type = + Reflection::Type::ArrayType.new( + Reflection::Type::UnionType.new([NilClass, Node]) + ) + + sig_block { sig_returns { sig_type_for(type) } } + end + + def generate_def_node(name, params) + DefNode( + nil, + nil, + Ident(name), + params, + BodyStmt(Statements([VoidStmt()]), nil, nil, nil, nil), + location + ) + end + + def generate_visitor(override) + body = [] + + Reflection.nodes.each do |name, node| + body << sig_block do + CallNode( + CallNode( + Ident(override), + Period("."), + sig_params do + BareAssocHash( + [ + Assoc( + Label("node:"), + sig_type_for(SyntaxTree.const_get(name)) + ) + ] + ) + end, + nil + ), + Period("."), + sig_returns do + CallNode(VarRef(Const("T")), Period("."), Ident("untyped"), nil) + end, + nil + ) + end + + body << generate_def_node( + node.visitor_method, + Paren( + LParen("("), + Params.new(requireds: [Ident("node")], location: location) + ) + ) + + @line += 2 + end + + body + end + + def sig_block + MethodAddBlock( + CallNode(nil, nil, Ident("sig"), nil), + BlockNode( + LBrace("{"), + nil, + BodyStmt(Statements([yield]), nil, nil, nil, nil) + ), + location + ) + end + + def sig_params + CallNode(nil, nil, Ident("params"), ArgParen(Args([yield]))) + end + + def sig_returns + CallNode(nil, nil, Ident("returns"), ArgParen(Args([yield]))) + end + + def sig_type_for(type) + case type + when Reflection::Type::ArrayType + ARef( + ConstPathRef(VarRef(Const("T")), Const("Array")), + sig_type_for(type.type) + ) + when Reflection::Type::TupleType + ArrayLiteral(LBracket("["), Args(type.types.map { sig_type_for(_1) })) + when Reflection::Type::UnionType + if type.types.include?(NilClass) + selected = type.types.reject { _1 == NilClass } + subtype = + if selected.size == 1 + selected.first + else + Reflection::Type::UnionType.new(selected) + end + + CallNode( + VarRef(Const("T")), + Period("."), + Ident("nilable"), + ArgParen(Args([sig_type_for(subtype)])) + ) + else + CallNode( + VarRef(Const("T")), + Period("."), + Ident("any"), + ArgParen(Args(type.types.map { sig_type_for(_1) })) + ) + end + when Symbol + ConstRef(Const("Symbol")) + else + *parents, constant = type.name.split("::").map { Const(_1) } + + if parents.empty? + ConstRef(constant) + else + [*parents[1..], constant].inject( + VarRef(parents.first) + ) { |accum, const| ConstPathRef(accum, const) } + end + end + end + + def location + Location.fixed(line: line, char: 0, column: 0) + end + end +end + +namespace :sorbet do + desc "Generate RBI files for Sorbet" + task :rbi do + puts SyntaxTree::RBI.new.generate + end +end diff --git a/test/cli_test.rb b/test/cli_test.rb index 7c9e2652..a0d6001d 100644 --- a/test/cli_test.rb +++ b/test/cli_test.rb @@ -10,6 +10,10 @@ def parse(source) source * 2 end + def format(source, _print_width, **) + "Formatted #{source}" + end + def read(filepath) File.read(filepath) end @@ -202,6 +206,28 @@ def test_multiple_inline_scripts assert_equal(["1 + 1", "2 + 2"], stdio.split("\n").sort) end + def test_format_script_with_custom_handler + SyntaxTree.register_handler(".test", TestHandler.new) + stdio, = + capture_io do + SyntaxTree::CLI.run(%w[format --extension=test -e ]) + end + assert_equal("Formatted \n", stdio) + ensure + SyntaxTree::HANDLERS.delete(".test") + end + + def test_format_stdin_with_custom_handler + SyntaxTree.register_handler(".test", TestHandler.new) + stdin = $stdin + $stdin = StringIO.new("") + stdio, = capture_io { SyntaxTree::CLI.run(%w[format --extension=test]) } + assert_equal("Formatted \n", stdio) + ensure + $stdin = stdin + SyntaxTree::HANDLERS.delete(".test") + end + def test_generic_error SyntaxTree.stub(:format, ->(*) { raise }) do result = run_cli("format") @@ -282,10 +308,52 @@ def test_plugin_args_with_config_file end end - private + def test_config_file_custom_path + with_plugin_directory do |directory| + plugin = directory.plugin("puts 'Custom config!'") + config = <<~TXT + --print-width=80 + --plugins=#{plugin} + TXT + + filepath = File.join(Dir.tmpdir, "#{SecureRandom.hex}.streerc") + with_config_file(config, filepath) do + contents = "#{"a" * 30} + #{"b" * 30}\n" + result = run_cli("format", "--config=#{filepath}", contents: contents) + + assert_equal("Custom config!\n#{contents}", result.stdio) + end + end + end + + def test_config_file_custom_path_space_separated + with_plugin_directory do |directory| + plugin = directory.plugin("puts 'Custom config space!'") + config = <<~TXT + --print-width=80 + --plugins=#{plugin} + TXT + + filepath = File.join(Dir.tmpdir, "#{SecureRandom.hex}.streerc") + with_config_file(config, filepath) do + contents = "#{"a" * 30} + #{"b" * 30}\n" + result = run_cli("format", "--config", filepath, contents: contents) + + assert_equal("Custom config space!\n#{contents}", result.stdio) + end + end + end + + def test_config_file_nonexistent_path + assert_raises(ArgumentError) do + run_cli("format", "--config=/nonexistent/path.streerc") + end + end Result = Struct.new(:status, :stdio, :stderr, keyword_init: true) + private + def run_cli(command, *args, contents: :default) tempfile = case contents @@ -316,8 +384,8 @@ def run_cli(command, *args, contents: :default) tempfile.unlink end - def with_config_file(contents) - filepath = File.join(Dir.pwd, SyntaxTree::CLI::ConfigFile::FILENAME) + def with_config_file(contents, filepath = nil) + filepath ||= File.join(Dir.pwd, SyntaxTree::CLI::ConfigFile::FILENAME) File.write(filepath, contents) yield diff --git a/test/compiler_test.rb b/test/compiler_test.rb deleted file mode 100644 index 1922f8c6..00000000 --- a/test/compiler_test.rb +++ /dev/null @@ -1,525 +0,0 @@ -# frozen_string_literal: true - -return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" -require_relative "test_helper" - -module SyntaxTree - class CompilerTest < Minitest::Test - CASES = [ - # Hooks - "BEGIN { a = 1 }", - "a = 1; END { a = 1 }; a", - # Various literals placed on the stack - "true", - "false", - "nil", - "self", - "0", - "1", - "2", - "1.0", - "1i", - "1r", - "1..2", - "1...2", - "(1)", - "%w[foo bar baz]", - "%W[foo bar baz]", - "%i[foo bar baz]", - "%I[foo bar baz]", - "{ foo: 1, bar: 1.0, baz: 1i }", - "'foo'", - "\"foo\"", - "\"foo\#{bar}\"", - "\"foo\#@bar\"", - "%q[foo]", - "%Q[foo]", - <<~RUBY, - "foo" \\ - "bar" - RUBY - <<~RUBY, - < 2", - "1 >= 2", - "1 == 2", - "1 != 2", - "1 & 2", - "1 | 2", - "1 << 2", - "1 ^ 2", - "foo.empty?", - "foo.length", - "foo.nil?", - "foo.size", - "foo.succ", - "/foo/ =~ \"foo\" && $1", - "\"foo\".freeze", - "\"foo\".freeze(1)", - "-\"foo\"", - "\"foo\".-@", - "\"foo\".-@(1)", - # Various method calls - "foo?", - "foo.bar", - "foo.bar(baz)", - "foo bar", - "foo.bar baz", - "foo(*bar)", - "foo(**bar)", - "foo(&bar)", - "foo.bar = baz", - "not foo", - "!foo", - "~foo", - "+foo", - "-foo", - "`foo`", - "`foo \#{bar} baz`", - # Local variables - "foo", - "foo = 1", - "foo = 1; bar = 2; baz = 3", - "foo = 1; foo", - "foo += 1", - "foo -= 1", - "foo *= 1", - "foo /= 1", - "foo %= 1", - "foo &= 1", - "foo |= 1", - "foo &&= 1", - "foo ||= 1", - "foo <<= 1", - "foo ^= 1", - "foo, bar = 1, 2", - "foo, bar, = 1, 2", - "foo, bar, baz = 1, 2", - "foo, bar = 1, 2, 3", - "foo = 1, 2, 3", - "foo, * = 1, 2, 3", - # Instance variables - "@foo", - "@foo = 1", - "@foo = 1; @bar = 2; @baz = 3", - "@foo = 1; @foo", - "@foo += 1", - "@foo -= 1", - "@foo *= 1", - "@foo /= 1", - "@foo %= 1", - "@foo &= 1", - "@foo |= 1", - "@foo &&= 1", - "@foo ||= 1", - "@foo <<= 1", - "@foo ^= 1", - # Class variables - "@@foo", - "@@foo = 1", - "@@foo = 1; @@bar = 2; @@baz = 3", - "@@foo = 1; @@foo", - "@@foo += 1", - "@@foo -= 1", - "@@foo *= 1", - "@@foo /= 1", - "@@foo %= 1", - "@@foo &= 1", - "@@foo |= 1", - "@@foo &&= 1", - "@@foo ||= 1", - "@@foo <<= 1", - "@@foo ^= 1", - # Global variables - "$foo", - "$foo = 1", - "$foo = 1; $bar = 2; $baz = 3", - "$foo = 1; $foo", - "$foo += 1", - "$foo -= 1", - "$foo *= 1", - "$foo /= 1", - "$foo %= 1", - "$foo &= 1", - "$foo |= 1", - "$foo &&= 1", - "$foo ||= 1", - "$foo <<= 1", - "$foo ^= 1", - # Index access - "foo[bar]", - "foo[bar] = 1", - "foo[bar] += 1", - "foo[bar] -= 1", - "foo[bar] *= 1", - "foo[bar] /= 1", - "foo[bar] %= 1", - "foo[bar] &= 1", - "foo[bar] |= 1", - "foo[bar] &&= 1", - "foo[bar] ||= 1", - "foo[bar] <<= 1", - "foo[bar] ^= 1", - "foo['true']", - "foo['true'] = 1", - # Constants (single) - "Foo", - "Foo = 1", - "Foo += 1", - "Foo -= 1", - "Foo *= 1", - "Foo /= 1", - "Foo %= 1", - "Foo &= 1", - "Foo |= 1", - "Foo &&= 1", - "Foo ||= 1", - "Foo <<= 1", - "Foo ^= 1", - # Constants (top) - "::Foo", - "::Foo = 1", - "::Foo += 1", - "::Foo -= 1", - "::Foo *= 1", - "::Foo /= 1", - "::Foo %= 1", - "::Foo &= 1", - "::Foo |= 1", - "::Foo &&= 1", - "::Foo ||= 1", - "::Foo <<= 1", - "::Foo ^= 1", - # Constants (nested) - "Foo::Bar::Baz", - "Foo::Bar::Baz += 1", - "Foo::Bar::Baz -= 1", - "Foo::Bar::Baz *= 1", - "Foo::Bar::Baz /= 1", - "Foo::Bar::Baz %= 1", - "Foo::Bar::Baz &= 1", - "Foo::Bar::Baz |= 1", - "Foo::Bar::Baz &&= 1", - "Foo::Bar::Baz ||= 1", - "Foo::Bar::Baz <<= 1", - "Foo::Bar::Baz ^= 1", - # Constants (top nested) - "::Foo::Bar::Baz", - "::Foo::Bar::Baz = 1", - "::Foo::Bar::Baz += 1", - "::Foo::Bar::Baz -= 1", - "::Foo::Bar::Baz *= 1", - "::Foo::Bar::Baz /= 1", - "::Foo::Bar::Baz %= 1", - "::Foo::Bar::Baz &= 1", - "::Foo::Bar::Baz |= 1", - "::Foo::Bar::Baz &&= 1", - "::Foo::Bar::Baz ||= 1", - "::Foo::Bar::Baz <<= 1", - "::Foo::Bar::Baz ^= 1", - # Constants (calls) - "Foo::Bar.baz", - "::Foo::Bar.baz", - "Foo::Bar.baz = 1", - "::Foo::Bar.baz = 1", - # Control flow - "foo&.bar", - "foo&.bar(1)", - "foo&.bar 1, 2, 3", - "foo&.bar {}", - "foo && bar", - "foo || bar", - "if foo then bar end", - "if foo then bar else baz end", - "if foo then bar elsif baz then qux end", - "foo if bar", - "unless foo then bar end", - "unless foo then bar else baz end", - "foo unless bar", - "foo while bar", - "while foo do bar end", - "foo until bar", - "until foo do bar end", - "for i in [1, 2, 3] do i end", - "foo ? bar : baz", - "case foo when bar then 1 end", - "case foo when bar then 1 else 2 end", - "baz if (foo == 1) .. (bar == 1)", - # Constructed values - "foo..bar", - "foo...bar", - "[1, 1.0, 1i, 1r]", - "[foo, bar, baz]", - "[@foo, @bar, @baz]", - "[@@foo, @@bar, @@baz]", - "[$foo, $bar, $baz]", - "%W[foo \#{bar} baz]", - "%I[foo \#{bar} baz]", - "[foo, bar] + [baz, qux]", - "[foo, bar, *baz, qux]", - "{ foo: bar, baz: qux }", - "{ :foo => bar, :baz => qux }", - "{ foo => bar, baz => qux }", - "%s[foo]", - "[$1, $2, $3, $4, $5, $6, $7, $8, $9]", - "/foo \#{bar} baz/", - "%r{foo \#{bar} baz}", - "[1, 2, 3].max", - "[foo, bar, baz].max", - "[foo, bar, baz].max(1)", - "[1, 2, 3].min", - "[foo, bar, baz].min", - "[foo, bar, baz].min(1)", - "[**{ x: true }][0][:x]", - # Core method calls - "alias foo bar", - "alias :foo :bar", - "super", - "super(1)", - "super(1, 2, 3)", - "undef foo", - "undef :foo", - "undef foo, bar, baz", - "undef :foo, :bar, :baz", - "def foo; yield; end", - "def foo; yield(1); end", - "def foo; yield(1, 2, 3); end", - # defined? usage - "defined?(foo)", - "defined?(\"foo\")", - "defined?(:foo)", - "defined?(@foo)", - "defined?(@@foo)", - "defined?($foo)", - "defined?(Foo)", - "defined?(yield)", - "defined?(super)", - "foo = 1; defined?(foo)", - "defined?(self)", - "defined?(true)", - "defined?(false)", - "defined?(nil)", - "defined?(foo = 1)", - # Ignored content - ";;;", - "# comment", - "=begin\nfoo\n=end", - <<~RUBY, - __END__ - RUBY - # Method definitions - "def foo; end", - "def foo(bar); end", - "def foo(bar, baz); end", - "def foo(bar = 1); end", - "def foo(bar = 1, baz = 2); end", - "def foo(*bar); end", - "def foo(bar, *baz); end", - "def foo(*bar, baz, qux); end", - "def foo(bar, *baz, qux); end", - "def foo(bar, baz, *qux, quaz); end", - "def foo(bar, baz, &qux); end", - "def foo(bar, *baz, &qux); end", - "def foo(&qux); qux; end", - "def foo(&qux); qux.call; end", - "def foo(&qux); qux = bar; end", - "def foo(bar:); end", - "def foo(bar:, baz:); end", - "def foo(bar: 1); end", - "def foo(bar: 1, baz: 2); end", - "def foo(bar: baz); end", - "def foo(bar: 1, baz: qux); end", - "def foo(bar: qux, baz: 1); end", - "def foo(bar: baz, qux: qaz); end", - "def foo(**rest); end", - "def foo(bar:, **rest); end", - "def foo(bar:, baz:, **rest); end", - "def foo(bar: 1, **rest); end", - "def foo(bar: 1, baz: 2, **rest); end", - "def foo(bar: baz, **rest); end", - "def foo(bar: 1, baz: qux, **rest); end", - "def foo(bar: qux, baz: 1, **rest); end", - "def foo(bar: baz, qux: qaz, **rest); end", - "def foo(...); end", - "def foo(bar, ...); end", - "def foo(...); bar(...); end", - "def foo(bar, ...); baz(1, 2, 3, ...); end", - "def self.foo; end", - "def foo.bar(baz); end", - # Class/module definitions - "module Foo; end", - "module ::Foo; end", - "module Foo::Bar; end", - "module ::Foo::Bar; end", - "module Foo; module Bar; end; end", - "class Foo; end", - "class ::Foo; end", - "class Foo::Bar; end", - "class ::Foo::Bar; end", - "class Foo; class Bar; end; end", - "class Foo < Baz; end", - "class ::Foo < Baz; end", - "class Foo::Bar < Baz; end", - "class ::Foo::Bar < Baz; end", - "class Foo; class Bar < Baz; end; end", - "class Foo < baz; end", - "class << Object; end", - "class << ::String; end", - # Block - "foo do end", - "foo {}", - "foo do |bar| end", - "foo { |bar| }", - "foo { |bar; baz| }", - "-> do end", - "-> {}", - "-> (bar) do end", - "-> (bar) {}", - "-> (bar; baz) { }", - # Pattern matching - "foo in bar", - "foo in [bar]", - "foo in [bar, baz]", - "foo in [1, 2, 3, bar, 4, 5, 6, baz]", - "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", - "foo => bar" - ] - - # These are the combinations of instructions that we're going to test. - OPTIONS = [ - YARV::Compiler::Options.new, - YARV::Compiler::Options.new(frozen_string_literal: true), - YARV::Compiler::Options.new(operands_unification: false), - # TODO: have this work when peephole optimizations are turned off. - # YARV::Compiler::Options.new(peephole_optimization: false), - YARV::Compiler::Options.new(specialized_instruction: false), - YARV::Compiler::Options.new(inline_const_cache: false), - YARV::Compiler::Options.new(tailcall_optimization: true) - ] - - OPTIONS.each do |options| - suffix = options.to_hash.map { |key, value| "#{key}=#{value}" }.join("&") - - CASES.each do |source| - define_method(:"test_compiles_#{source}_(#{suffix})") do - assert_compiles(source, options) - end - - define_method(:"test_loads_#{source}_(#{suffix})") do - assert_loads(source, options) - end - - define_method(:"test_disasms_#{source}_(#{suffix})") do - assert_disasms(source, options) - end - end - end - - def test_evaluation - assert_evaluates 5, "2 + 3" - assert_evaluates 5, "a = 2; b = 3; a + b" - end - - private - - def serialize_iseq(iseq) - serialized = iseq.to_a - - serialized[4].delete(:node_id) - serialized[4].delete(:code_location) - serialized[4].delete(:node_ids) - - serialized[13] = serialized[13].filter_map do |insn| - case insn - when Array - insn.map do |operand| - if operand.is_a?(Array) && - operand[0] == YARV::InstructionSequence::MAGIC - serialize_iseq(operand) - else - operand - end - end - when Integer, :RUBY_EVENT_LINE - # ignore these for now - else - insn - end - end - - serialized - end - - # Check that the compiled instruction sequence matches the expected - # instruction sequence. - def assert_compiles(source, options) - assert_equal( - serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(YARV.compile(source, options)) - ) - end - - # Check that the compiled instruction sequence matches the instruction - # sequence created directly from the compiled instruction sequence. - def assert_loads(source, options) - compiled = RubyVM::InstructionSequence.compile(source, **options) - - assert_equal( - serialize_iseq(compiled), - serialize_iseq(YARV::InstructionSequence.from(compiled.to_a, options)) - ) - end - - # Check that we can successfully disasm the compiled instruction sequence. - def assert_disasms(source, options) - compiled = RubyVM::InstructionSequence.compile(source, **options) - yarv = YARV::InstructionSequence.from(compiled.to_a, options) - assert_kind_of String, yarv.disasm - end - - def assert_evaluates(expected, source) - assert_equal expected, YARV.compile(source).eval - end - end -end diff --git a/test/fixtures/arg_paren.rb b/test/fixtures/arg_paren.rb index 0e01e208..0816af6a 100644 --- a/test/fixtures/arg_paren.rb +++ b/test/fixtures/arg_paren.rb @@ -2,8 +2,6 @@ foo(bar) % foo() -- -foo % foo(barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr) - diff --git a/test/fixtures/array_literal.rb b/test/fixtures/array_literal.rb index df807728..391d2eae 100644 --- a/test/fixtures/array_literal.rb +++ b/test/fixtures/array_literal.rb @@ -24,9 +24,16 @@ - fooooooooooooooooo = 1 [ - fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo, - fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo, - fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo, fooooooooooooooooo + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo, + fooooooooooooooooo ] % [ diff --git a/test/fixtures/assoc.rb b/test/fixtures/assoc.rb index 0fc60e6f..83a4887a 100644 --- a/test/fixtures/assoc.rb +++ b/test/fixtures/assoc.rb @@ -48,3 +48,7 @@ { "foo #{bar}": "baz" } % { "foo=": "baz" } +% # >= 3.1.0 +{ bar => 1, baz: } +% # >= 3.1.0 +{ baz:, bar => 1 } diff --git a/test/fixtures/binary.rb b/test/fixtures/binary.rb index f8833cdc..4cb56cbf 100644 --- a/test/fixtures/binary.rb +++ b/test/fixtures/binary.rb @@ -3,6 +3,11 @@ % foo << bar % +foo << barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr << barrrrrrrrrrrrr << barrrrrrrrrrrrrrrrrr +- +foo << barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr << barrrrrrrrrrrrr << + barrrrrrrrrrrrrrrrrr +% foo**bar % foo * barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr diff --git a/test/fixtures/bodystmt.rb b/test/fixtures/bodystmt.rb index 4cbb8f5e..5999fdba 100644 --- a/test/fixtures/bodystmt.rb +++ b/test/fixtures/bodystmt.rb @@ -36,6 +36,7 @@ end % begin +rescue StandardError else # else end % diff --git a/test/fixtures/break.rb b/test/fixtures/break.rb index a77c6b35..23277f6b 100644 --- a/test/fixtures/break.rb +++ b/test/fixtures/break.rb @@ -1,29 +1,45 @@ % -break +tap { break } % -break foo +tap { break foo } % -break foo, bar +tap { break foo, bar } % -break(foo) +tap { break(foo) } % -break fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo +tap { break fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo } - -break( - fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo -) +tap do + break( + fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo + ) +end % -break(fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo) +tap { break(fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo) } - -break( - fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo -) -% -break (foo), bar -% -break( - foo - bar -) -% -break foo.bar :baz do |qux| qux end +tap do + break( + fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo + ) +end +% +tap { break (foo), bar } +% +tap do + break( + foo + bar + ) +end +% +tap { break foo.bar :baz do |qux| qux end } +- +tap do + break( + foo.bar :baz do |qux| + qux + end + ) +end +% +tap { break :foo => "bar" } diff --git a/test/fixtures/call.rb b/test/fixtures/call.rb index c41ee4ac..eec717f0 100644 --- a/test/fixtures/call.rb +++ b/test/fixtures/call.rb @@ -60,3 +60,15 @@ % a b do end.c d +% +self. +=begin +=end + to_s +% +fooooooooooooooooooooooooooooooooooo.barrrrrrrrrrrrrrrrrrrrrrrrrrrrrr.where.not(:id).order(:id) +- +fooooooooooooooooooooooooooooooooooo + .barrrrrrrrrrrrrrrrrrrrrrrrrrrrrr + .where.not(:id) + .order(:id) diff --git a/test/fixtures/def.rb b/test/fixtures/def.rb index a827adfe..0cc49e0a 100644 --- a/test/fixtures/def.rb +++ b/test/fixtures/def.rb @@ -23,3 +23,9 @@ def foo() # comment def foo( # comment ) end +% +def +=begin +=end +a +end diff --git a/test/fixtures/def_endless.rb b/test/fixtures/def_endless.rb index 4595fba9..8d1f9d33 100644 --- a/test/fixtures/def_endless.rb +++ b/test/fixtures/def_endless.rb @@ -22,3 +22,13 @@ def self.foo = bar baz end def foo? = true +% +def a() +=begin +=end +=1 +- +def a() = +=begin +=end + 1 diff --git a/test/fixtures/hash.rb b/test/fixtures/hash.rb index 9c43a4fe..70e89f69 100644 --- a/test/fixtures/hash.rb +++ b/test/fixtures/hash.rb @@ -29,3 +29,5 @@ { # comment } +% # >= 3.1.0 +{ foo:, "bar" => "baz" } diff --git a/test/fixtures/if.rb b/test/fixtures/if.rb index cfd6a882..b25386b9 100644 --- a/test/fixtures/if.rb +++ b/test/fixtures/if.rb @@ -67,3 +67,10 @@ if true # comment1 # comment2 end +% +result = + if false && val = 1 + "A" + else + "B" + end diff --git a/test/fixtures/ifop.rb b/test/fixtures/ifop.rb index e56eb987..f7504658 100644 --- a/test/fixtures/ifop.rb +++ b/test/fixtures/ifop.rb @@ -11,8 +11,10 @@ % foo bar ? 1 : 2 % -foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo ? break : baz +tap { foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo ? break : baz } - -foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo ? - break : - baz +tap do + foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo ? + break : + baz +end diff --git a/test/fixtures/lambda.rb b/test/fixtures/lambda.rb index 5dba3be3..8b922ef0 100644 --- a/test/fixtures/lambda.rb +++ b/test/fixtures/lambda.rb @@ -80,3 +80,31 @@ -> do # comment1 # comment2 end +% # multiline lambda in a command +command "arg" do + -> { + multi + line + } +end +- +command "arg" do + -> do + multi + line + end +end +% # multiline lambda in a command call +command.call "arg" do + -> { + multi + line + } +end +- +command.call "arg" do + -> do + multi + line + end +end diff --git a/test/fixtures/next.rb b/test/fixtures/next.rb index be667951..dc159488 100644 --- a/test/fixtures/next.rb +++ b/test/fixtures/next.rb @@ -1,67 +1,82 @@ % -next +tap { next } % -next foo +tap { next foo } % -next foo, bar +tap { next foo, bar } % -next(foo) +tap { next(foo) } % -next fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo +tap { next fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo } - -next( - fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo -) +tap do + next( + fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo + ) +end % -next(fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo) +tap { next(fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo) } - -next( - fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo -) -% -next (foo), bar -% -next( - foo - bar -) +tap do + next( + fooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo + ) +end +% +tap { next (foo), bar } +% +tap do + next( + foo + bar + ) +end +% +tap { next(1) } +- +tap { next 1 } % -next(1) +tap { next(1.0) } - -next 1 +tap { next 1.0 } % -next(1.0) +tap { next($a) } - -next 1.0 +tap { next $a } % -next($a) +tap { next(@@a) } - -next $a +tap { next @@a } % -next(@@a) +tap { next(self) } - -next @@a +tap { next self } % -next(self) +tap { next(@a) } - -next self +tap { next @a } % -next(@a) +tap { next(A) } - -next @a +tap { next A } % -next(A) +tap { next([]) } - -next A +tap { next [] } % -next([]) +tap { next([1]) } - -next [] +tap { next [1] } % -next([1]) +tap { next([1, 2]) } - -next [1] +tap { next 1, 2 } % -next([1, 2]) +tap { next fun foo do end } - -next 1, 2 +tap do + next( + fun foo do + end + ) +end diff --git a/test/fixtures/rassign.rb b/test/fixtures/rassign.rb index 3db52b18..3d357351 100644 --- a/test/fixtures/rassign.rb +++ b/test/fixtures/rassign.rb @@ -23,3 +23,9 @@ % a in Integer b => [Integer => c] +% +case [0] +when 0 + { a: 0 } => { a: } + puts a +end diff --git a/test/fixtures/redo.rb b/test/fixtures/redo.rb index 8ab087a2..962af3d0 100644 --- a/test/fixtures/redo.rb +++ b/test/fixtures/redo.rb @@ -1,4 +1,6 @@ % -redo +tap { redo } % -redo # comment +tap do + redo # comment +end diff --git a/test/fixtures/retry.rb b/test/fixtures/retry.rb index 2b14d21a..47b6be51 100644 --- a/test/fixtures/retry.rb +++ b/test/fixtures/retry.rb @@ -1,4 +1,10 @@ % -retry +begin +rescue StandardError + retry +end % -retry # comment +begin +rescue StandardError + retry # comment +end diff --git a/test/fixtures/return.rb b/test/fixtures/return.rb index 8f7d0aa3..7092464f 100644 --- a/test/fixtures/return.rb +++ b/test/fixtures/return.rb @@ -37,3 +37,5 @@ return [] % return [1] +% +return :foo => "bar" diff --git a/test/fixtures/symbols.rb b/test/fixtures/symbols.rb index 5e2673f3..12f0a22f 100644 --- a/test/fixtures/symbols.rb +++ b/test/fixtures/symbols.rb @@ -19,3 +19,8 @@ %I[foo] # comment % %I{foo[]} +% +:\ +=begin +=end +symbol diff --git a/test/fixtures/var_field_rassign.rb b/test/fixtures/var_field_rassign.rb index 3e019c5c..aa5ec379 100644 --- a/test/fixtures/var_field_rassign.rb +++ b/test/fixtures/var_field_rassign.rb @@ -1,6 +1,7 @@ % foo in bar % +bar = 1 foo in ^bar % foo in ^@bar diff --git a/test/fixtures/yield.rb b/test/fixtures/yield.rb index f3f023f8..3cf1e5f1 100644 --- a/test/fixtures/yield.rb +++ b/test/fixtures/yield.rb @@ -1,16 +1,30 @@ % -yield foo +def foo + yield foo +end % -yield(foo) +def foo + yield(foo) +end % -yield foo, bar +def foo + yield foo, bar +end % -yield(foo, bar) +def foo + yield(foo, bar) +end % -yield foo # comment +def foo + yield foo # comment +end % -yield(foo) # comment +def foo + yield(foo) # comment +end % -yield( # comment - foo -) +def foo + yield( # comment + foo + ) +end diff --git a/test/fixtures/yield0.rb b/test/fixtures/yield0.rb index a168c4aa..c1833bb5 100644 --- a/test/fixtures/yield0.rb +++ b/test/fixtures/yield0.rb @@ -1,4 +1,8 @@ % -yield +def foo + yield +end % -yield # comment +def foo + yield # comment +end diff --git a/test/formatting_test.rb b/test/formatting_test.rb index eff7ef71..5e5f9e9f 100644 --- a/test/formatting_test.rb +++ b/test/formatting_test.rb @@ -7,6 +7,7 @@ class FormattingTest < Minitest::Test Fixtures.each_fixture do |fixture| define_method(:"test_formatted_#{fixture.name}") do assert_equal(fixture.formatted, SyntaxTree.format(fixture.source)) + assert_syntax_tree(SyntaxTree.parse(fixture.source)) end end @@ -27,5 +28,37 @@ def test_stree_ignore assert_equal(source, SyntaxTree.format(source)) end + + def test_formatting_with_different_indentation_level + source = <<~SOURCE + def foo + puts "a" + end + SOURCE + + # Default indentation + assert_equal(source, SyntaxTree.format(source)) + + # Level 2 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 2).rstrip) + def foo + puts "a" + end + EXPECTED + + # Level 4 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 4).rstrip) + def foo + puts "a" + end + EXPECTED + + # Level 6 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 6).rstrip) + def foo + puts "a" + end + EXPECTED + end end end diff --git a/test/index_test.rb b/test/index_test.rb new file mode 100644 index 00000000..1e2a7fc7 --- /dev/null +++ b/test/index_test.rb @@ -0,0 +1,183 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class IndexTest < Minitest::Test + def test_module + index_each("module Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_module_nested + index_each("module Foo; module Bar; end; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [[:Foo], [:Bar]], entry.nesting + end + end + + def test_module_comments + index_each("# comment1\n# comment2\nmodule Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_class + index_each("class Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_class_paths_2 + index_each("class Foo::Bar; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [%i[Foo Bar]], entry.nesting + end + end + + def test_class_paths_3 + index_each("class Foo::Bar::Baz; end") do |entry| + assert_equal :Baz, entry.name + assert_equal [%i[Foo Bar Baz]], entry.nesting + end + end + + def test_class_nested + index_each("class Foo; class Bar; end; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [[:Foo], [:Bar]], entry.nesting + end + end + + def test_class_paths_nested + index_each("class Foo; class Bar::Baz::Qux; end; end") do |entry| + assert_equal :Qux, entry.name + assert_equal [[:Foo], %i[Bar Baz Qux]], entry.nesting + end + end + + def test_class_superclass + index_each("class Foo < Bar; end") do |entry| + assert_equal :Foo, entry.name + assert_equal [[:Foo]], entry.nesting + assert_equal [:Bar], entry.superclass + end + end + + def test_class_path_superclass + index_each("class Foo::Bar < Baz::Qux; end") do |entry| + assert_equal :Bar, entry.name + assert_equal [%i[Foo Bar]], entry.nesting + assert_equal %i[Baz Qux], entry.superclass + end + end + + def test_class_comments + index_each("# comment1\n# comment2\nclass Foo; end") do |entry| + assert_equal :Foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_method + index_each("def foo; end") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_method_nested + index_each("class Foo; def foo; end; end") do |entry| + assert_equal :foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_method_comments + index_each("# comment1\n# comment2\ndef foo; end") do |entry| + assert_equal :foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_singleton_method + index_each("def self.foo; end") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_singleton_method_nested + index_each("class Foo; def self.foo; end; end") do |entry| + assert_equal :foo, entry.name + assert_equal [[:Foo]], entry.nesting + end + end + + def test_singleton_method_comments + index_each("# comment1\n# comment2\ndef self.foo; end") do |entry| + assert_equal :foo, entry.name + assert_equal ["# comment1", "# comment2"], entry.comments.to_a + end + end + + def test_alias_method + index_each("alias foo bar") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_attr_reader + index_each("attr_reader :foo") do |entry| + assert_equal :foo, entry.name + assert_empty entry.nesting + end + end + + def test_attr_writer + index_each("attr_writer :foo") do |entry| + assert_equal :foo=, entry.name + assert_empty entry.nesting + end + end + + def test_attr_accessor + index_each("attr_accessor :foo") do |entry| + assert_equal :foo=, entry.name + assert_empty entry.nesting + end + end + + def test_constant + index_each("FOO = 1") do |entry| + assert_equal :FOO, entry.name + assert_empty entry.nesting + end + end + + def test_this_file + entries = Index.index_file(__FILE__, backend: Index::ParserBackend.new) + + if defined?(RubyVM::InstructionSequence) + entries += Index.index_file(__FILE__, backend: Index::ISeqBackend.new) + end + + entries.map { |entry| entry.comments.to_a } + end + + private + + def index_each(source) + yield Index.index(source, backend: Index::ParserBackend.new).last + + if defined?(RubyVM::InstructionSequence) + yield Index.index(source, backend: Index::ISeqBackend.new).last + end + end + end +end diff --git a/test/interface_test.rb b/test/interface_test.rb deleted file mode 100644 index 5086680e..00000000 --- a/test/interface_test.rb +++ /dev/null @@ -1,72 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class InterfaceTest < Minitest::Test - ObjectSpace.each_object(Node.singleton_class) do |klass| - next if klass == Node - - define_method(:"test_instantiate_#{klass.name}") do - assert_syntax_tree(instantiate(klass)) - end - end - - Fixtures.each_fixture do |fixture| - define_method(:"test_#{fixture.name}") do - assert_syntax_tree(SyntaxTree.parse(fixture.source)) - end - end - - private - - # This method is supposed to instantiate a new instance of the given class. - # The class is always a descendant from SyntaxTree::Node, so we can make - # certain assumptions about the way the initialize method is set up. If it - # needs to be special-cased, it's done so at the end of this method. - def instantiate(klass) - params = {} - - # Set up all of the keyword parameters for the class. - klass - .instance_method(:initialize) - .parameters - .each { |(type, name)| params[name] = nil if type.start_with?("key") } - - # Set up any default values that have to be arrays. - %i[ - assocs - comments - elements - keywords - locals - optionals - parts - posts - requireds - symbols - values - ].each { |key| params[key] = [] if params.key?(key) } - - # Set up a default location for the node. - params[:location] = Location.fixed(line: 0, char: 0, column: 0) - - case klass.name - when "SyntaxTree::Binary" - klass.new(**params, operator: :+) - when "SyntaxTree::Kw" - klass.new(**params, value: "kw") - when "SyntaxTree::Label" - klass.new(**params, value: "label:") - when "SyntaxTree::Op" - klass.new(**params, value: "+") - when "SyntaxTree::RegexpLiteral" - klass.new(**params, ending: "/") - when "SyntaxTree::Statements" - klass.new(nil, **params, body: []) - else - klass.new(**params) - end - end - end -end diff --git a/test/language_server_test.rb b/test/language_server_test.rb index 2fe4e60a..54455c95 100644 --- a/test/language_server_test.rb +++ b/test/language_server_test.rb @@ -6,19 +6,38 @@ module SyntaxTree # stree-ignore class LanguageServerTest < Minitest::Test - class Initialize < Struct.new(:id) + class Initialize + attr_reader :id + + def initialize(id) + @id = id + end + def to_hash { method: "initialize", id: id } end end - class Shutdown < Struct.new(:id) + class Shutdown + attr_reader :id + + def initialize(id) + @id = id + end + def to_hash { method: "shutdown", id: id } end end - class TextDocumentDidOpen < Struct.new(:uri, :text) + class TextDocumentDidOpen + attr_reader :uri, :text + + def initialize(uri, text) + @uri = uri + @text = text + end + def to_hash { method: "textDocument/didOpen", @@ -27,7 +46,14 @@ def to_hash end end - class TextDocumentDidChange < Struct.new(:uri, :text) + class TextDocumentDidChange + attr_reader :uri, :text + + def initialize(uri, text) + @uri = uri + @text = text + end + def to_hash { method: "textDocument/didChange", @@ -39,7 +65,13 @@ def to_hash end end - class TextDocumentDidClose < Struct.new(:uri) + class TextDocumentDidClose + attr_reader :uri + + def initialize(uri) + @uri = uri + end + def to_hash { method: "textDocument/didClose", @@ -48,7 +80,14 @@ def to_hash end end - class TextDocumentFormatting < Struct.new(:id, :uri) + class TextDocumentFormatting + attr_reader :id, :uri + + def initialize(id, uri) + @id = id + @uri = uri + end + def to_hash { method: "textDocument/formatting", @@ -58,7 +97,14 @@ def to_hash end end - class TextDocumentInlayHint < Struct.new(:id, :uri) + class TextDocumentInlayHint + attr_reader :id, :uri + + def initialize(id, uri) + @id = id + @uri = uri + end + def to_hash { method: "textDocument/inlayHint", @@ -68,7 +114,14 @@ def to_hash end end - class SyntaxTreeVisualizing < Struct.new(:id, :uri) + class SyntaxTreeVisualizing + attr_reader :id, :uri + + def initialize(id, uri) + @id = id + @uri = uri + end + def to_hash { method: "syntaxTree/visualizing", @@ -98,6 +151,24 @@ def test_formatting assert_equal("class Bar\nend\n", responses.dig(1, :result, 0, :newText)) end + def test_formatting_ignore + responses = run_server([ + Initialize.new(1), + TextDocumentDidOpen.new("file:///path/to/file.rb", "class Foo; end"), + TextDocumentFormatting.new(2, "file:///path/to/file.rb"), + Shutdown.new(3) + ], ignore_files: ["path/**/*.rb"]) + + shape = LanguageServer::Request[[ + { id: 1, result: { capabilities: Hash } }, + { id: 2, result: :any }, + { id: 3, result: {} } + ]] + + assert_operator(shape, :===, responses) + assert_nil(responses.dig(1, :result)) + end + def test_formatting_failure responses = run_server([ Initialize.new(1), @@ -269,14 +340,15 @@ def read(content) end end - def run_server(messages, print_width: DEFAULT_PRINT_WIDTH) + def run_server(messages, print_width: DEFAULT_PRINT_WIDTH, ignore_files: []) input = StringIO.new(messages.map { |message| write(message) }.join) output = StringIO.new LanguageServer.new( input: input, output: output, - print_width: print_width + print_width: print_width, + ignore_files: ignore_files ).run read(output.tap(&:rewind)) diff --git a/test/node_test.rb b/test/node_test.rb index 3d700e73..f2706b2c 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -60,7 +60,7 @@ def test_arg_paren_heredoc ARGUMENT SOURCE - at = location(lines: 1..3, chars: 6..28) + at = location(lines: 1..3, chars: 6..37) assert_node(ArgParen, source, at: at, &:arguments) end @@ -131,7 +131,7 @@ def test_aryptn end SOURCE - at = location(lines: 2..2, chars: 18..47) + at = location(lines: 2..2, chars: 18..48) assert_node(AryPtn, source, at: at) { |node| node.consequent.pattern } end @@ -280,7 +280,10 @@ def test_brace_block end def test_break - assert_node(Break, "break value") + at = location(chars: 6..17) + assert_node(Break, "tap { break value }", at: at) do |node| + node.block.bodystmt.body.first + end end def test_call @@ -533,7 +536,7 @@ def test_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..22) + at = location(lines: 1..3, chars: 0..30) assert_node(Heredoc, source, at: at) end @@ -544,7 +547,7 @@ def test_heredoc_beg HEREDOC SOURCE - at = location(chars: 0..11) + at = location(chars: 0..10) assert_node(HeredocBeg, source, at: at, &:beginning) end @@ -555,7 +558,7 @@ def test_heredoc_end HEREDOC SOURCE - at = location(lines: 3..3, chars: 22..31, columns: 0..9) + at = location(lines: 3..3, chars: 22..30, columns: 0..8) assert_node(HeredocEnd, source, at: at, &:ending) end @@ -710,7 +713,10 @@ def test_mrhs_add_star end def test_next - assert_node(Next, "next(value)") + at = location(chars: 6..17) + assert_node(Next, "tap { next(value) }", at: at) do |node| + node.block.bodystmt.body.first + end end def test_op @@ -786,7 +792,9 @@ def test_rational end def test_redo - assert_node(Redo, "redo") + assert_node(Redo, "tap { redo }", at: location(chars: 6..10)) do |node| + node.block.bodystmt.body.first + end end def test_regexp_literal @@ -833,7 +841,10 @@ def test_rest_param end def test_retry - assert_node(Retry, "retry") + at = location(chars: 15..20) + assert_node(Retry, "begin; rescue; retry; end", at: at) do |node| + node.bodystmt.rescue_clause.statements.body.first + end end def test_return @@ -949,8 +960,8 @@ def test_var_field guard_version("3.1.0") do def test_pinned_var_ref - source = "foo in ^bar" - at = location(chars: 8..11) + source = "bar = 1; foo in ^bar" + at = location(chars: 16..20) assert_node(PinnedVarRef, source, at: at, &:pattern) end @@ -1008,16 +1019,22 @@ def test_xstring_heredoc HEREDOC SOURCE - at = location(lines: 1..3, chars: 0..18) + at = location(lines: 1..3, chars: 0..26) assert_node(Heredoc, source, at: at) end def test_yield - assert_node(YieldNode, "yield value") + at = location(lines: 2..2, chars: 10..21) + assert_node(YieldNode, "def foo\n yield value\nend\n", at: at) do |node| + node.bodystmt.statements.body.first + end end def test_yield0 - assert_node(YieldNode, "yield") + at = location(lines: 2..2, chars: 10..15) + assert_node(YieldNode, "def foo\n yield\nend\n", at: at) do |node| + node.bodystmt.statements.body.first + end end def test_zsuper @@ -1058,6 +1075,342 @@ def test_root_class_raises_not_implemented_errors end end + def test_arity_no_args + source = <<~SOURCE + def foo + end + SOURCE + + at = location(chars: 0..11, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(0..0, node.arity) + node + end + end + + def test_arity_positionals + source = <<~SOURCE + def foo(a, b = 1) + end + SOURCE + + at = location(chars: 0..21, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1..2, node.arity) + node + end + end + + def test_arity_rest + source = <<~SOURCE + def foo(a, *b) + end + SOURCE + + at = location(chars: 0..18, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1.., node.arity) + node + end + end + + def test_arity_keyword_rest + source = <<~SOURCE + def foo(a, **b) + end + SOURCE + + at = location(chars: 0..19, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1.., node.arity) + node + end + end + + def test_arity_keywords + source = <<~SOURCE + def foo(a:, b: 1) + end + SOURCE + + at = location(chars: 0..21, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1..2, node.arity) + node + end + end + + def test_arity_mixed + source = <<~SOURCE + def foo(a, b = 1, c:, d: 2) + end + SOURCE + + at = location(chars: 0..31, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(2..4, node.arity) + node + end + end + + guard_version("2.7.3") do + def test_arity_arg_forward + source = <<~SOURCE + def foo(...) + end + SOURCE + + at = location(chars: 0..16, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(0.., node.arity) + node + end + end + end + + guard_version("3.0.0") do + def test_arity_positional_and_arg_forward + source = <<~SOURCE + def foo(a, ...) + end + SOURCE + + at = location(chars: 0..19, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1.., node.arity) + node + end + end + end + + def test_arity_no_parenthesis + source = <<~SOURCE + def foo a, b = 1 + end + SOURCE + + at = location(chars: 0..20, columns: 0..3, lines: 1..2) + assert_node(DefNode, source, at: at) do |node| + assert_equal(1..2, node.arity) + node + end + end + + def test_block_arity_positionals + source = <<~SOURCE + [].each do |a, b, c| + end + SOURCE + + at = location(chars: 8..24, columns: 8..3, lines: 1..2) + assert_node(BlockNode, source, at: at) do |node| + block = node.block + assert_equal(3..3, block.arity) + block + end + end + + def test_block_arity_with_optional + source = <<~SOURCE + [].each do |a, b = 1| + end + SOURCE + + at = location(chars: 8..25, columns: 8..3, lines: 1..2) + assert_node(BlockNode, source, at: at) do |node| + block = node.block + assert_equal(1..2, block.arity) + block + end + end + + def test_block_arity_with_optional_keyword + source = <<~SOURCE + [].each do |a, b: 2| + end + SOURCE + + at = location(chars: 8..24, columns: 8..3, lines: 1..2) + assert_node(BlockNode, source, at: at) do |node| + block = node.block + assert_equal(1..2, block.arity) + block + end + end + + def test_call_node_arity_positional_arguments + source = <<~SOURCE + foo(1, 2, 3) + SOURCE + + at = location(chars: 0..12, columns: 0..3, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(3, node.arity) + node + end + end + + def test_call_node_arity_keyword_arguments + source = <<~SOURCE + foo(bar, something: 123) + SOURCE + + at = location(chars: 0..24, columns: 0..24, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(2, node.arity) + node + end + end + + def test_call_node_arity_splat_arguments + source = <<~SOURCE + foo(*bar) + SOURCE + + at = location(chars: 0..9, columns: 0..9, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_call_node_arity_keyword_rest_arguments + source = <<~SOURCE + foo(**bar) + SOURCE + + at = location(chars: 0..10, columns: 0..10, lines: 1..1) + assert_node(CallNode, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + guard_version("2.7.3") do + def test_call_node_arity_arg_forward_arguments + source = <<~SOURCE + def foo(...) + bar(...) + end + SOURCE + + at = location(chars: 15..23, columns: 2..10, lines: 2..2) + assert_node(CallNode, source, at: at) do |node| + call = node.bodystmt.statements.body.first + assert_equal(Float::INFINITY, call.arity) + call + end + end + end + + def test_command_arity_positional_arguments + source = <<~SOURCE + foo 1, 2, 3 + SOURCE + + at = location(chars: 0..11, columns: 0..3, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(3, node.arity) + node + end + end + + def test_command_arity_keyword_arguments + source = <<~SOURCE + foo bar, something: 123 + SOURCE + + at = location(chars: 0..23, columns: 0..23, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(2, node.arity) + node + end + end + + def test_command_arity_splat_arguments + source = <<~SOURCE + foo *bar + SOURCE + + at = location(chars: 0..8, columns: 0..8, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_command_arity_keyword_rest_arguments + source = <<~SOURCE + foo **bar + SOURCE + + at = location(chars: 0..9, columns: 0..9, lines: 1..1) + assert_node(Command, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_command_call_arity_positional_arguments + source = <<~SOURCE + object.foo 1, 2, 3 + SOURCE + + at = location(chars: 0..18, columns: 0..3, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(3, node.arity) + node + end + end + + def test_command_call_arity_keyword_arguments + source = <<~SOURCE + object.foo bar, something: 123 + SOURCE + + at = location(chars: 0..30, columns: 0..30, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(2, node.arity) + node + end + end + + def test_command_call_arity_splat_arguments + source = <<~SOURCE + object.foo *bar + SOURCE + + at = location(chars: 0..15, columns: 0..15, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_command_call_arity_keyword_rest_arguments + source = <<~SOURCE + object.foo **bar + SOURCE + + at = location(chars: 0..16, columns: 0..16, lines: 1..1) + assert_node(CommandCall, source, at: at) do |node| + assert_equal(Float::INFINITY, node.arity) + node + end + end + + def test_vcall_arity + source = <<~SOURCE + foo + SOURCE + + at = location(chars: 0..3, columns: 0..3, lines: 1..1) + assert_node(VCall, source, at: at) do |node| + assert_equal(0, node.arity) + node + end + end + private def location(lines: 1..1, chars: 0..0, columns: 0..0) diff --git a/test/parser_test.rb b/test/parser_test.rb index 6048cf11..169d5b46 100644 --- a/test/parser_test.rb +++ b/test/parser_test.rb @@ -33,7 +33,7 @@ def test_parses_ripper_methods def test_errors_on_missing_token_with_location error = assert_raises(Parser::ParseError) { SyntaxTree.parse("f+\"foo") } - assert_equal(2, error.column) + assert_equal(3, error.column) end def test_errors_on_missing_end_with_location @@ -45,7 +45,7 @@ def test_errors_on_missing_regexp_ending error = assert_raises(Parser::ParseError) { SyntaxTree.parse("a =~ /foo") } - assert_equal(5, error.column) + assert_equal(6, error.column) end def test_errors_on_missing_token_without_location @@ -65,5 +65,62 @@ def foo end RUBY end + + def test_does_not_choke_on_invalid_characters_in_source_string + SyntaxTree.parse(<<~RUBY) + # comment + # comment + __END__ + \xC5 + RUBY + end + + def test_lambda_vars_with_parameters_location + tree = SyntaxTree.parse(<<~RUBY) + # comment + # comment + ->(_i; a) { a } + RUBY + + local_location = + tree.statements.body.last.params.contents.locals.first.location + + assert_equal(3, local_location.start_line) + assert_equal(3, local_location.end_line) + assert_equal(7, local_location.start_column) + assert_equal(8, local_location.end_column) + end + + def test_lambda_vars_location + tree = SyntaxTree.parse(<<~RUBY) + # comment + # comment + ->(; a) { a } + RUBY + + local_location = + tree.statements.body.last.params.contents.locals.first.location + + assert_equal(3, local_location.start_line) + assert_equal(3, local_location.end_line) + assert_equal(5, local_location.start_column) + assert_equal(6, local_location.end_column) + end + + def test_multiple_lambda_vars_location + tree = SyntaxTree.parse(<<~RUBY) + # comment + # comment + ->(; a, b, c) { a } + RUBY + + local_location = + tree.statements.body.last.params.contents.locals.last.location + + assert_equal(3, local_location.start_line) + assert_equal(3, local_location.end_line) + assert_equal(11, local_location.start_column) + assert_equal(12, local_location.end_column) + end end end diff --git a/test/plugin/disable_auto_ternary_test.rb b/test/plugin/disable_auto_ternary_test.rb new file mode 100644 index 00000000..b2af9d35 --- /dev/null +++ b/test/plugin/disable_auto_ternary_test.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +module SyntaxTree + class DisableTernaryTest < Minitest::Test + def test_short_if_else_unchanged + assert_format(<<~RUBY) + if true + 1 + else + 2 + end + RUBY + end + + def test_short_ternary_unchanged + assert_format("true ? 1 : 2\n") + end + + private + + def assert_format(expected, source = expected) + options = Formatter::Options.new(disable_auto_ternary: true) + formatter = Formatter.new(source, [], options: options) + SyntaxTree.parse(source).format(formatter) + + formatter.flush + assert_equal(expected, formatter.output.join) + end + end +end diff --git a/test/plugin/single_quotes_test.rb b/test/plugin/single_quotes_test.rb index 6ce10448..b1359ac7 100644 --- a/test/plugin/single_quotes_test.rb +++ b/test/plugin/single_quotes_test.rb @@ -8,6 +8,14 @@ def test_empty_string_literal assert_format("''\n", "\"\"") end + def test_character_literal_with_double_quote + assert_format("'\"'\n", "?\"") + end + + def test_character_literal_with_singlee_quote + assert_format("'\\''\n", "?'") + end + def test_string_literal assert_format("'string'\n", "\"string\"") end diff --git a/test/ractor_test.rb b/test/ractor_test.rb index bcdb2a51..7e0201ca 100644 --- a/test/ractor_test.rb +++ b/test/ractor_test.rb @@ -33,7 +33,7 @@ def test_formatting private def filepaths - Dir.glob(File.expand_path("../lib/syntax_tree/{node,parser}.rb", __dir__)) + Dir.glob(File.expand_path("../lib/syntax_tree/plugin/*.rb", __dir__)) end # Ractors still warn about usage, so I'm disabling that warning here just to diff --git a/test/syntax_tree_test.rb b/test/syntax_tree_test.rb index 05242d94..27aa6851 100644 --- a/test/syntax_tree_test.rb +++ b/test/syntax_tree_test.rb @@ -22,13 +22,18 @@ def method # comment SOURCE bodystmt = SyntaxTree.parse(source).statements.body.first.bodystmt - assert_equal(20, bodystmt.location.start_char) + assert_equal(20, bodystmt.start_char) end def test_parse_error assert_raises(Parser::ParseError) { SyntaxTree.parse("<>") } end + def test_marshalable + node = SyntaxTree.parse("1 + 2") + assert_operator(node, :===, Marshal.load(Marshal.dump(node))) + end + def test_maxwidth_format assert_equal("foo +\n bar\n", SyntaxTree.format("foo + bar", 5)) end diff --git a/test/test_helper.rb b/test/test_helper.rb index 77627e26..787f819d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,16 +1,53 @@ # frozen_string_literal: true -require "simplecov" -SimpleCov.start do - add_filter("idempotency_test.rb") unless ENV["CI"] - add_group("lib", "lib") - add_group("test", "test") +unless RUBY_ENGINE == "truffleruby" + require "simplecov" + SimpleCov.start do + add_filter("idempotency_test.rb") unless ENV["CI"] + add_group("lib", "lib") + add_group("test", "test") + end end $LOAD_PATH.unshift(File.expand_path("../lib", __dir__)) require "syntax_tree" require "syntax_tree/cli" +unless RUBY_ENGINE == "truffleruby" + # Here we are going to establish type verification whenever a new node is + # created. We do this through the reflection module, which in turn parses the + # source code of the node classes. + require "syntax_tree/reflection" + SyntaxTree::Reflection.nodes.each do |name, node| + next if name == :Statements + + clazz = SyntaxTree.const_get(name) + parameters = clazz.instance_method(:initialize).parameters + + # First, verify that all of the parameters listed in the list of attributes. + # If there are any parameters that aren't listed in the attributes, then + # something went wrong with the parsing in the reflection module. + raise unless (parameters.map(&:last) - node.attributes.keys).empty? + + # Now we're going to use an alias chain to redefine the initialize method to + # include type checking. + clazz.alias_method(:initialize_without_verify, :initialize) + clazz.define_method(:initialize) do |**kwargs| + kwargs.each do |kwarg, value| + attribute = node.attributes.fetch(kwarg) + + unless attribute.type === value + raise TypeError, + "invalid type for #{name}##{kwarg}, expected " \ + "#{attribute.type.inspect}, got #{value.inspect}" + end + end + + initialize_without_verify(**kwargs) + end + end +end + require "json" require "tempfile" require "pp" @@ -61,7 +98,7 @@ def assert_syntax_tree(node) assert_includes(pretty, type) # Assert that we can get back a new tree by using the mutation visitor. - assert_operator node, :===, node.accept(Visitor::MutationVisitor.new) + assert_operator node, :===, node.accept(MutationVisitor.new) # Serialize the node to JSON, parse it back out, and assert that we have # found the expected type. @@ -69,17 +106,15 @@ def assert_syntax_tree(node) refute_includes(json, "#<") assert_equal(type, JSON.parse(json)["type"]) - if RUBY_ENGINE != "truffleruby" - # Get a match expression from the node, then assert that it can in fact - # match the node. - # rubocop:disable all - assert(eval(<<~RUBY)) - case node - in #{node.construct_keys} - true - end - RUBY - end + # Get a match expression from the node, then assert that it can in fact + # match the node. + # rubocop:disable all + assert(eval(<<~RUBY)) + case node + in #{node.construct_keys} + true + end + RUBY end Minitest::Test.include(self) diff --git a/test/visitor_test.rb b/test/visitor_test.rb index 74f3df75..d9637df0 100644 --- a/test/visitor_test.rb +++ b/test/visitor_test.rb @@ -30,13 +30,15 @@ def initialize @visited_nodes = [] end - visit_method def visit_class(node) - @visited_nodes << node.constant.constant.value - super - end + visit_methods do + def visit_class(node) + @visited_nodes << node.constant.constant.value + super + end - visit_method def visit_def(node) - @visited_nodes << node.name.value + def visit_def(node) + @visited_nodes << node.name.value + end end end @@ -53,5 +55,19 @@ def test_visit_method_correction assert_match(/visit_binary/, message) end end + + class VisitMethodsTestVisitor < BasicVisitor + end + + def test_visit_methods + VisitMethodsTestVisitor.visit_methods do + assert_raises(BasicVisitor::VisitMethodError) do + # In reality, this would be a method defined using the def keyword, + # but we're using method_added here to trigger the checker so that we + # aren't defining methods dynamically in the test suite. + VisitMethodsTestVisitor.method_added(:visit_foo) + end + end + end end end diff --git a/test/visitor_with_environment_test.rb b/test/visitor_with_environment_test.rb deleted file mode 100644 index cc4007fe..00000000 --- a/test/visitor_with_environment_test.rb +++ /dev/null @@ -1,659 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -module SyntaxTree - class VisitorWithEnvironmentTest < Minitest::Test - class Collector < Visitor - include WithEnvironment - - attr_reader :variables, :arguments - - def initialize - @variables = {} - @arguments = {} - end - - def visit_ident(node) - local = current_environment.find_local(node.value) - return unless local - - value = node.value.delete_suffix(":") - - case local.type - when :argument - @arguments[value] = local - when :variable - @variables[value] = local - end - end - - def visit_label(node) - value = node.value.delete_suffix(":") - local = current_environment.find_local(value) - return unless local - - @arguments[value] = node if local.type == :argument - end - end - - def test_collecting_simple_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 1 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_aref_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = [] - a[1] - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_multi_assign_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a, b = [1, 2] - puts a - puts b - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(1, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(3, variable_a.usages[0].start_line) - - variable_b = visitor.variables["b"] - assert_equal(1, variable_b.definitions.length) - assert_equal(1, variable_b.usages.length) - - assert_equal(2, variable_b.definitions[0].start_line) - assert_equal(4, variable_b.usages[0].start_line) - end - - def test_collecting_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - case [1, 2] - in Integer => a, Integer - puts a - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # There are two occurrences, one on line 3 for pinning and one on line 4 - # for reference - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - - # Assignment a - assert_equal(3, variable.definitions[0].start_line) - assert_equal(4, variable.usages[0].start_line) - end - - def test_collecting_pinned_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - a = 18 - case [1, 2] - in ^a, *rest - puts a - puts rest - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(2, visitor.variables.length) - - variable_a = visitor.variables["a"] - assert_equal(2, variable_a.definitions.length) - assert_equal(1, variable_a.usages.length) - - assert_equal(2, variable_a.definitions[0].start_line) - assert_equal(4, variable_a.definitions[1].start_line) - assert_equal(5, variable_a.usages[0].start_line) - - variable_rest = visitor.variables["rest"] - assert_equal(1, variable_rest.definitions.length) - assert_equal(4, variable_rest.definitions[0].start_line) - - # Rest is considered a vcall by the parser instead of a var_ref - # assert_equal(1, variable_rest.usages.length) - # assert_equal(6, variable_rest.usages[0].start_line) - end - - if RUBY_VERSION >= "3.1" - def test_collecting_one_line_pattern_matching_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo - [1] => a - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(2, variable.definitions[0].start_line) - assert_equal(3, variable.usages[0].start_line) - end - - def test_collecting_endless_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) = puts a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(1, argument.usages[0].start_line) - end - end - - def test_collecting_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_singleton_method_arguments - tree = SyntaxTree.parse(<<~RUBY) - def self.foo(a) - puts a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_method_arguments_all_types - tree = SyntaxTree.parse(<<~RUBY) - def foo(a, b = 1, *c, d, e: 1, **f, &block) - puts a - puts b - puts c - puts d - puts e - puts f - block.call - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(7, visitor.arguments.length) - - argument_a = visitor.arguments["a"] - assert_equal(1, argument_a.definitions.length) - assert_equal(1, argument_a.usages.length) - assert_equal(1, argument_a.definitions[0].start_line) - assert_equal(2, argument_a.usages[0].start_line) - - argument_b = visitor.arguments["b"] - assert_equal(1, argument_b.definitions.length) - assert_equal(1, argument_b.usages.length) - assert_equal(1, argument_b.definitions[0].start_line) - assert_equal(3, argument_b.usages[0].start_line) - - argument_c = visitor.arguments["c"] - assert_equal(1, argument_c.definitions.length) - assert_equal(1, argument_c.usages.length) - assert_equal(1, argument_c.definitions[0].start_line) - assert_equal(4, argument_c.usages[0].start_line) - - argument_d = visitor.arguments["d"] - assert_equal(1, argument_d.definitions.length) - assert_equal(1, argument_d.usages.length) - assert_equal(1, argument_d.definitions[0].start_line) - assert_equal(5, argument_d.usages[0].start_line) - - argument_e = visitor.arguments["e"] - assert_equal(1, argument_e.definitions.length) - assert_equal(1, argument_e.usages.length) - assert_equal(1, argument_e.definitions[0].start_line) - assert_equal(6, argument_e.usages[0].start_line) - - argument_f = visitor.arguments["f"] - assert_equal(1, argument_f.definitions.length) - assert_equal(1, argument_f.usages.length) - assert_equal(1, argument_f.definitions[0].start_line) - assert_equal(7, argument_f.usages[0].start_line) - - argument_block = visitor.arguments["block"] - assert_equal(1, argument_block.definitions.length) - assert_equal(1, argument_block.usages.length) - assert_equal(1, argument_block.definitions[0].start_line) - assert_equal(8, argument_block.usages[0].start_line) - end - - def test_collecting_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each do |i| - puts i - end - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - end - - def test_collecting_one_line_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - [].each { |i| puts i } - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_collecting_shadowed_block_arguments - tree = SyntaxTree.parse(<<~RUBY) - def foo - i = "something" - - [].each do |i| - puts i - end - - i - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - argument = visitor.arguments["i"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - assert_equal(4, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - - variable = visitor.variables["i"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - assert_equal(2, variable.definitions[0].start_line) - assert_equal(8, variable.usages[0].start_line) - end - - def test_collecting_shadowed_local_variables - tree = SyntaxTree.parse(<<~RUBY) - def foo(a) - puts a - a = 123 - a - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - # All occurrences are considered arguments, despite overriding the - # argument value - assert_equal(1, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["a"] - assert_equal(2, argument.definitions.length) - assert_equal(2, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.definitions[1].start_line) - assert_equal(2, argument.usages[0].start_line) - assert_equal(4, argument.usages[1].start_line) - end - - def test_variables_in_the_top_level - tree = SyntaxTree.parse(<<~RUBY) - a = 123 - a - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["a"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_field - tree = SyntaxTree.parse(<<~RUBY) - object = {} - object["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.attributes["name"] = "something" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_with_two_accesses - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["first"]["second"] ||= [] - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_aref_on_a_method_call_with_arguments - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object.instance_variable_get(:@attributes)[:something] = :other_thing - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(0, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - end - - def test_double_aref_on_method_call - tree = SyntaxTree.parse(<<~RUBY) - object = MyObject.new - object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(1, visitor.arguments.length) - assert_equal(1, visitor.variables.length) - - variable = visitor.variables["object"] - assert_equal(1, variable.definitions.length) - assert_equal(1, variable.usages.length) - - assert_equal(1, variable.definitions[0].start_line) - assert_equal(2, variable.usages[0].start_line) - - argument = visitor.arguments["a"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(2, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - end - - def test_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, three)| - one - two - three - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(3, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - end - - def test_double_nested_arguments - tree = SyntaxTree.parse(<<~RUBY) - [[1, [2, 3]]].each do |one, (two, (three, four))| - one - two - three - four - end - RUBY - - visitor = Collector.new - visitor.visit(tree) - - assert_equal(4, visitor.arguments.length) - assert_equal(0, visitor.variables.length) - - argument = visitor.arguments["one"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(2, argument.usages[0].start_line) - - argument = visitor.arguments["two"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(3, argument.usages[0].start_line) - - argument = visitor.arguments["three"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(4, argument.usages[0].start_line) - - argument = visitor.arguments["four"] - assert_equal(1, argument.definitions.length) - assert_equal(1, argument.usages.length) - - assert_equal(1, argument.definitions[0].start_line) - assert_equal(5, argument.usages[0].start_line) - end - - class Resolver < Visitor - include WithEnvironment - - attr_reader :locals - - def initialize - @locals = [] - end - - def visit_assign(node) - level = 0 - environment = current_environment - level += 1 until (environment = environment.parent).nil? - - locals << [node.target.value.value, level] - super - end - end - - def test_class - source = <<~RUBY - module Level0 - level0 = 0 - - module Level1 - level1 = 1 - - class Level2 - level2 = 2 - end - end - end - RUBY - - visitor = Resolver.new - SyntaxTree.parse(source).accept(visitor) - - assert_equal [["level0", 0], ["level1", 1], ["level2", 2]], visitor.locals - end - end -end diff --git a/test/with_scope_test.rb b/test/with_scope_test.rb new file mode 100644 index 00000000..6b48d17d --- /dev/null +++ b/test/with_scope_test.rb @@ -0,0 +1,567 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class WithScopeTest < Minitest::Test + class Collector < Visitor + prepend WithScope + + attr_reader :arguments, :variables + + def initialize + @arguments = {} + @variables = {} + end + + def self.collect(source) + new.tap { SyntaxTree.parse(source).accept(_1) } + end + + visit_methods do + def visit_ident(node) + value = node.value.delete_suffix(":") + local = current_scope.find_local(node.value) + + case local&.type + when :argument + arguments[[current_scope.id, value]] = local + when :variable + variables[[current_scope.id, value]] = local + end + end + + def visit_label(node) + value = node.value.delete_suffix(":") + local = current_scope.find_local(value) + + if local&.type == :argument + arguments[[current_scope.id, value]] = node + end + end + + def visit_vcall(node) + local = current_scope.find_local(node.value) + variables[[current_scope.id, value]] = local if local + + super + end + end + end + + def test_collecting_simple_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 1 + a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_aref_variables + collector = Collector.collect(<<~RUBY) + def foo + a = [] + a[1] + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_multi_assign_variables + collector = Collector.collect(<<~RUBY) + def foo + a, b = [1, 2] + puts a + puts b + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + assert_variable(collector, "b", definitions: [2], usages: [4]) + end + + def test_collecting_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + case [1, 2] + in Integer => a, Integer + puts a + end + end + RUBY + + # There are two occurrences, one on line 3 for pinning and one on line 4 + # for reference + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [3], usages: [4]) + end + + def test_collecting_pinned_variables + collector = Collector.collect(<<~RUBY) + def foo + a = 18 + case [1, 2] + in ^a, *rest + puts a + puts rest + end + end + RUBY + + assert_equal(2, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [4, 5]) + assert_variable(collector, "rest", definitions: [4], usages: [6]) + end + + if RUBY_VERSION >= "3.1" + def test_collecting_one_line_pattern_matching_variables + collector = Collector.collect(<<~RUBY) + def foo + [1] => a + puts a + end + RUBY + + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [2], usages: [3]) + end + + def test_collecting_endless_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) = puts a + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [1]) + end + end + + def test_collecting_method_arguments + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_methods_with_destructured_post_arguments + collector = Collector.collect(<<~RUBY) + def foo(optional = 1, (bin, bag)) + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_argument(collector, "optional", definitions: [1], usages: []) + assert_argument(collector, "bin", definitions: [1], usages: []) + assert_argument(collector, "bag", definitions: [1], usages: []) + end + + def test_collecting_methods_with_desctructured_post_using_splat + collector = Collector.collect(<<~RUBY) + def foo(optional = 1, (bin, bag, *)) + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_argument(collector, "optional", definitions: [1], usages: []) + assert_argument(collector, "bin", definitions: [1], usages: []) + assert_argument(collector, "bag", definitions: [1], usages: []) + end + + def test_collecting_methods_with_nested_desctructured + collector = Collector.collect(<<~RUBY) + def foo(optional = 1, (bin, (bag))) + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_argument(collector, "optional", definitions: [1], usages: []) + assert_argument(collector, "bin", definitions: [1], usages: []) + assert_argument(collector, "bag", definitions: [1], usages: []) + end + + def test_collecting_singleton_method_arguments + collector = Collector.collect(<<~RUBY) + def self.foo(a) + puts a + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + end + + def test_collecting_method_arguments_all_types + collector = Collector.collect(<<~RUBY) + def foo(a, b = 1, *c, d, e: 1, **f, &block) + puts a + puts b + puts c + puts d + puts e + puts f + block.call + end + RUBY + + assert_equal(7, collector.arguments.length) + assert_argument(collector, "a", definitions: [1], usages: [2]) + assert_argument(collector, "b", definitions: [1], usages: [3]) + assert_argument(collector, "c", definitions: [1], usages: [4]) + assert_argument(collector, "d", definitions: [1], usages: [5]) + assert_argument(collector, "e", definitions: [1], usages: [6]) + assert_argument(collector, "f", definitions: [1], usages: [7]) + assert_argument(collector, "block", definitions: [1], usages: [8]) + end + + def test_collecting_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each do |i| + puts i + end + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [3]) + end + + def test_collecting_destructured_block_arguments + collector = Collector.collect(<<~RUBY) + [].each do |(a, *b)| + end + RUBY + + assert_equal(2, collector.arguments.length) + assert_argument(collector, "b", definitions: [1]) + end + + def test_collecting_anonymous_destructured_block_arguments + collector = Collector.collect(<<~RUBY) + [].each do |(a, *)| + end + RUBY + + assert_equal(1, collector.arguments.length) + end + + def test_collecting_one_line_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + [].each { |i| puts i } + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [2], usages: [2]) + end + + def test_collecting_shadowed_block_arguments + collector = Collector.collect(<<~RUBY) + def foo + i = "something" + + [].each do |i| + puts i + end + + i + end + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "i", definitions: [4], usages: [5]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "i", definitions: [2], usages: [8]) + end + + def test_collecting_shadowed_local_variables + collector = Collector.collect(<<~RUBY) + def foo(a) + puts a + a = 123 + a + end + RUBY + + # All occurrences are considered arguments, despite overriding the + # argument value + assert_equal(1, collector.arguments.length) + assert_equal(0, collector.variables.length) + assert_argument(collector, "a", definitions: [1, 3], usages: [2, 4]) + end + + def test_variables_in_the_top_level + collector = Collector.collect(<<~RUBY) + a = 123 + a + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "a", definitions: [1], usages: [2]) + end + + def test_aref_field + collector = Collector.collect(<<~RUBY) + object = {} + object["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.attributes["name"] = "something" + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_with_two_accesses + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["first"]["second"] ||= [] + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_aref_on_a_method_call_with_arguments + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object.instance_variable_get(:@attributes)[:something] = :other_thing + RUBY + + assert_equal(0, collector.arguments.length) + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_double_aref_on_method_call + collector = Collector.collect(<<~RUBY) + object = MyObject.new + object["attributes"].find { |a| a["field"] == "expected" }["value"] = "changed" + RUBY + + assert_equal(1, collector.arguments.length) + assert_argument(collector, "a", definitions: [2], usages: [2]) + + assert_equal(1, collector.variables.length) + assert_variable(collector, "object", definitions: [1], usages: [2]) + end + + def test_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, three)| + one + two + three + end + RUBY + + assert_equal(3, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + end + + def test_double_nested_arguments + collector = Collector.collect(<<~RUBY) + [[1, [2, 3]]].each do |one, (two, (three, four))| + one + two + three + four + end + RUBY + + assert_equal(4, collector.arguments.length) + assert_equal(0, collector.variables.length) + + assert_argument(collector, "one", definitions: [1], usages: [2]) + assert_argument(collector, "two", definitions: [1], usages: [3]) + assert_argument(collector, "three", definitions: [1], usages: [4]) + assert_argument(collector, "four", definitions: [1], usages: [5]) + end + + def test_block_locals + collector = Collector.collect(<<~RUBY) + [].each do |; a| + end + RUBY + + assert_equal(1, collector.variables.length) + + assert_variable(collector, "a", definitions: [1]) + end + + def test_lambda_locals + collector = Collector.collect(<<~RUBY) + ->(;a) { } + RUBY + + assert_equal(1, collector.variables.length) + + assert_variable(collector, "a", definitions: [1]) + end + + def test_regex_named_capture_groups + collector = Collector.collect(<<~RUBY) + if /(?\\w+)-(?\\w+)/ =~ "something-else" + one + two + end + RUBY + + assert_equal(2, collector.variables.length) + + assert_variable(collector, "one", definitions: [1], usages: [2]) + assert_variable(collector, "two", definitions: [1], usages: [3]) + end + + def test_multiline_regex_named_capture_groups + collector = Collector.collect(<<~RUBY) + if %r{ + (?\\w+)- + (?\\w+) + } =~ "something-else" + one + two + end + RUBY + + assert_equal(2, collector.variables.length) + + assert_variable(collector, "one", definitions: [2], usages: [5]) + assert_variable(collector, "two", definitions: [3], usages: [6]) + end + + class Resolver < Visitor + prepend WithScope + + attr_reader :locals + + def initialize + @locals = [] + end + + visit_methods do + def visit_assign(node) + super.tap do + level = 0 + name = node.target.value.value + + scope = current_scope + while !scope.locals.key?(name) && !scope.parent.nil? + level += 1 + scope = scope.parent + end + + locals << [name, level] + end + end + end + end + + def test_resolver + source = <<~RUBY + module Level0 + level0 = 0 + + class Level1 + level1 = 1 + + def level2 + level2 = 2 + + tap do |level3| + level2 = 2 + level3 = 3 + + tap do |level4| + level2 = 2 + level4 = 4 + end + end + end + end + end + RUBY + + resolver = Resolver.new + SyntaxTree.parse(source).accept(resolver) + + expected = [ + ["level0", 0], + ["level1", 0], + ["level2", 0], + ["level2", 1], + ["level3", 0], + ["level2", 2], + ["level4", 0] + ] + + assert_equal expected, resolver.locals + end + + private + + def assert_collected(field, name, definitions: [], usages: []) + keys = field.keys.select { |key| key[1] == name } + assert_equal(1, keys.length) + + variable = field[keys.first] + + assert_equal(definitions.length, variable.definitions.length) + definitions.each_with_index do |definition, index| + assert_equal(definition, variable.definitions[index].start_line) + end + + assert_equal(usages.length, variable.usages.length) + usages.each_with_index do |usage, index| + assert_equal(usage, variable.usages[index].start_line) + end + end + + def assert_argument(collector, name, definitions: [], usages: []) + assert_collected( + collector.arguments, + name, + definitions: definitions, + usages: usages + ) + end + + def assert_variable(collector, name, definitions: [], usages: []) + assert_collected( + collector.variables, + name, + definitions: definitions, + usages: usages + ) + end + end +end diff --git a/test/yarv_test.rb b/test/yarv_test.rb deleted file mode 100644 index f8e0ffdb..00000000 --- a/test/yarv_test.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" -require_relative "test_helper" - -module SyntaxTree - class YARVTest < Minitest::Test - CASES = { - "0" => "break 0\n", - "1" => "break 1\n", - "2" => "break 2\n", - "1.0" => "break 1.0\n", - "1 + 2" => "break 1 + 2\n", - "1 - 2" => "break 1 - 2\n", - "1 * 2" => "break 1 * 2\n", - "1 / 2" => "break 1 / 2\n", - "1 % 2" => "break 1 % 2\n", - "1 < 2" => "break 1 < 2\n", - "1 <= 2" => "break 1 <= 2\n", - "1 > 2" => "break 1 > 2\n", - "1 >= 2" => "break 1 >= 2\n", - "1 == 2" => "break 1 == 2\n", - "1 != 2" => "break 1 != 2\n", - "1 & 2" => "break 1 & 2\n", - "1 | 2" => "break 1 | 2\n", - "1 << 2" => "break 1 << 2\n", - "1 >> 2" => "break 1.>>(2)\n", - "1 ** 2" => "break 1.**(2)\n", - "a = 1; a" => "a = 1\nbreak a\n" - }.freeze - - CASES.each do |source, expected| - define_method("test_disassemble_#{source}") do - assert_decompiles(expected, source) - end - end - - def test_bf - hello_world = - "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]" \ - ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." - - iseq = YARV::Bf.new(hello_world).compile - Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) - end - - private - - def assert_decompiles(expected, source) - ruby = YARV::Decompiler.new(YARV.compile(source)).to_ruby - actual = Formatter.format(source, ruby) - assert_equal expected, actual - end - end -end