ruby_parser
Advanced tools
Sorry, the diff of this file is too big to display
+17
-3
@@ -94,2 +94,3 @@ #!/usr/bin/env ruby -w | ||
| "keyword_END", "klEND", | ||
| "k_END", "klEND", | ||
| /keyword_(\w+)/, proc { "k#{$1.upcase}" }, | ||
@@ -157,9 +158,18 @@ /\bk_([a-z_]+)/, proc { "k#{$1.upcase}" }, | ||
| '"do (for lambda)"', "kDO_LAMBDA", | ||
| '"do (for block)"', "kDO_BLOCK", | ||
| %("'do' for block"), "kDO_BLOCK", # 3.4 | ||
| %("'do' for lambda"), "kDO_LAMBDA", # 3.4 | ||
| %("'do' for condition"),"kDO_COND", # 3.4 | ||
| %q("#{"), "tSTRING_DBEG", # 3.4 | ||
| '"do (for block)"', "kDO_BLOCK", # 3.4 | ||
| /\"'(\w+)' \(?modifier\)?\"/, proc { |x| "k#{$1.upcase}_MOD" }, # 3.4 | ||
| /\"(\w+) \(?modifier\)?\"/, proc { |x| "k#{$1.upcase}_MOD" }, | ||
| /\"(\w+)\"/, proc { |x| "k#{$1.upcase}" }, | ||
| /\"((?!k)\w+)\"/, proc { |x| "k#{$1.upcase}" }, | ||
| /\$?@(\d+)(\s+|$)/, "", # newer bison | ||
| # 3.4(ish?) changes: | ||
| "option_tNL", "opt_nl", | ||
| "option_terms", "opt_terms", | ||
| # TODO: remove for 3.0 work: | ||
@@ -183,3 +193,3 @@ "lex_ctxt ", "" # 3.0 production that's mostly noise right now | ||
| case line.strip | ||
| case line.strip # TODO: .delete %q["'()] | ||
| when /^$/ then | ||
@@ -190,2 +200,6 @@ when /^(\d+) (\$?[@\w]+): (.*)/ then # yacc | ||
| rules[rule] << munge($3) | ||
| when /^(\d+) (\$?[@\w]+'(?: |\\n)'): (.*)/ then # munges both sides | ||
| rule = $2 | ||
| order << rule unless rules.has_key? rule | ||
| rules[munge(rule)] << munge($3) | ||
| when /^(\d+) \s+\| (.*)/ then # yacc | ||
@@ -192,0 +206,0 @@ rules[rule] << munge($2) |
+35
-5
@@ -41,7 +41,7 @@ # Running the Gauntlet | ||
| % cd RP/gauntlet | ||
| % time caffeinate /Volumes/StuffA/gauntlet/bin/unpack_gems.rb -v [-a] ; say done | ||
| % time caffeinate ./bin/unpack_gems.rb -v [-a] ; say done | ||
| ... waaaait ... | ||
| % DIR=gauntlet.$(today).(all|new).noindex | ||
| % mv hashed.noindex $DIR | ||
| % tar vc -T <(fd -tf . $DIR | sort) | zstd -5 -T0 --long > archives/$DIR.tar.zst ; say done | ||
| % tar vc -T <(fd -tf . $DIR | sort) | zstdmt -12 --long > archives/$DIR.tar.zst ; say done | ||
| % ./bin/sync.sh | ||
@@ -75,3 +75,3 @@ ``` | ||
| ``` | ||
| 9696 % find gauntlet.$(today).noindex -type f | lc | ||
| 9696 % fd -tf . gauntlet.$(today).noindex | wc -l | ||
| 561270 | ||
@@ -90,5 +90,8 @@ 3.5G gauntlet.2021-08-06.noindex | ||
| ``` | ||
| % zstdcat gauntlet.$(today).noindex.tar.zst | tar x | ||
| % tar xf gauntlet.$(today).noindex.tar.zst | ||
| ``` | ||
| (BSD tar (and apparently newer gnu tars) can detect and uncompress | ||
| most compression formats) | ||
| Then, either run a single process (easier to read): | ||
@@ -109,3 +112,30 @@ | ||
| ``` | ||
| % while true ; do clear; fd . -t d -t e gauntlet/*.noindex -X rmdir -p 2> /dev/null ; for D in gauntlet/*.noindex/? ; do echo -n "$D: "; fd .rb $D | wc -l ; done ; echo ; sleep 30 ; done | ||
| % while true ; do clear; fd . -td -te gauntlet/*.noindex -X rmdir -p 2> /dev/null ; for D in gauntlet/*.noindex/? ; do echo -n "$D: "; fd .rb $D | wc -l ; done ; echo ; sleep 30 ; done | ||
| ``` | ||
| After this is run and done, there will be files left over that | ||
| couldn't be parsed. There will also be a directory with a name like | ||
| `gauntlet.slow.1` of files that timed out. What I generally do is wait | ||
| for the first run to end and then start increasing the timeout and run | ||
| again on the timeout dir: | ||
| ``` | ||
| $ ls -d gauntlet.slow.1/*.noindex/?/? | RP_TIMEOUT=30 time xargs -n 1 -P 16 ./gauntlet/bin/gauntlet.rb | ||
| # or: | ||
| $ RP_TIMEOUT=30 time ./gauntlet/bin/gauntlet.rb gauntlet.slow.* | ||
| $ RP_TIMEOUT=60 time ./gauntlet/bin/gauntlet.rb gauntlet.slow.* | ||
| $ fd -tf . gauntlet.slow.60/ | ||
| gauntlet.slow.60/gauntlet.2025-10-22.new.noindex/2/f/f/2ff00bbd2ee63b2145d247570c130823dce2b9fe.rb | ||
| gauntlet.slow.60/gauntlet.2025-10-22.new.noindex/a/a/4/aa44d5a214217036425bf8fce5a7ab5b0e04fd92.rb | ||
| ``` | ||
| for the most part, you wind up with absurdly large generated ruby files: | ||
| ``` | ||
| 10022 $ wc -l gauntlet.slow.60/*/?/?/?/*.rb | ||
| 412444 gauntlet.slow.60/gauntlet.2025-10-22.new.noindex/2/f/f/2ff00bbd2ee63b2145d247570c130823dce2b9fe.rb | ||
| 295249 gauntlet.slow.60/gauntlet.2025-10-22.new.noindex/a/a/4/aa44d5a214217036425bf8fce5a7ab5b0e04fd92.rb | ||
| 707693 total | ||
| ``` | ||
| and I don't care so much about these. |
+29
-0
@@ -0,1 +1,30 @@ | ||
| === 3.22.0 / 2025-12-20 | ||
| * 1 major enhancement: | ||
| * Announced the EOL of ruby_parser! YAY! | ||
| * 6 minor enhancements: | ||
| * 3.3: working down the grammar structural differences. | ||
| * Added tentative 3.4 support. | ||
| * Renamed assignable to old_assignable and added newer cleaner assignable | ||
| * Restructured comparison rake tasks and normalization to deal with lrama changes. | ||
| * Updated compare versions and added 3.4. | ||
| * Updated grammars to fit most closely to ruby 3.2. Working it down the line. | ||
| * 11 bug fixes: | ||
| * Always raise RubyParser::SyntaxError (wrap and raise). | ||
| * Bumped ruby version to 3.2+. | ||
| * Cleaned up user_variable productions to always return sexps. | ||
| * Corrections and enhancements for gauntlet.md instructions. | ||
| * Fixed rake compare's clean subtask to nuke the unpacked ruby dirs entirely. | ||
| * Fixed superclass of V34. | ||
| * Fixed up a couple errors created by using STRICT_SEXP=2. 3 will be neigh impossible. | ||
| * I am nothing if not stubborn: fixed errors caused by STRICT_SEXP=3. | ||
| * Minor fixes for 3.2+ grammars on "defined?" expressions. | ||
| * Removed a lot of ancient compatibility checks and patches. (eg encodings) | ||
| * This in turn cleaned up var_ref and a bunch of other productions. | ||
| === 3.21.1 / 2024-07-09 | ||
@@ -2,0 +31,0 @@ |
@@ -26,4 +26,2 @@ # frozen_string_literal: true | ||
| HAS_ENC = "".respond_to? :encoding | ||
| BTOKENS = { | ||
@@ -30,0 +28,0 @@ ".." => :tBDOT2, |
@@ -1,4 +0,2 @@ | ||
| # encoding: ASCII-8BIT | ||
| # frozen_string_literal: true | ||
| # TODO: remove encoding comment | ||
@@ -33,3 +31,3 @@ require "sexp" | ||
| module RubyParserStuff | ||
| VERSION = "3.21.1" | ||
| VERSION = "3.22.0" | ||
@@ -96,5 +94,2 @@ attr_accessor :lexer, :in_def, :in_single, :file, :in_argdef | ||
| # TODO: remove | ||
| has_enc = "".respond_to? :encoding | ||
| # This is in sorted order of occurrence according to | ||
@@ -113,3 +108,3 @@ # charlock_holmes against 500k files, with UTF_8 forced | ||
| Encoding::EUC_JP | ||
| ] if has_enc | ||
| ] | ||
@@ -231,4 +226,4 @@ JUMP_TYPE = [:return, :next, :break, :yield].map { |k| [k, true] }.to_h | ||
| def endless_method_name defn_or_defs | ||
| name = defn_or_defs[1] | ||
| name = defn_or_defs[2] unless Symbol === name | ||
| _, name, maybe_name, * = defn_or_defs | ||
| name = maybe_name unless Symbol === name | ||
@@ -261,2 +256,32 @@ if attrset_id? name then | ||
| def assignable(lhs, value = nil) | ||
| id, line = lhs.last, lhs.line | ||
| result = | ||
| case lhs.sexp_type | ||
| when :const then | ||
| s(:cdecl, id) | ||
| when :cvar then | ||
| asgn = in_def || in_single > 0 | ||
| s((asgn ? :cvasgn : :cvdecl), id) | ||
| when :gvar then | ||
| s(:gasgn, id) | ||
| when :ivar then | ||
| s(:iasgn, id) | ||
| else | ||
| case self.env[id] | ||
| when :lvar, :dvar, nil then | ||
| self.env[id] ||= :lvar | ||
| s(:lasgn, id) | ||
| else | ||
| raise "wtf? unknown type: #{self.env[id]}" | ||
| end | ||
| end | ||
| result << value if value | ||
| result.line line | ||
| result | ||
| end | ||
| def old_assignable(lhs, value = nil) | ||
| id, line = lhs | ||
@@ -347,3 +372,3 @@ id = id.to_sym | ||
| when :array, :args, :call_args then # HACK? remove array at some point | ||
| result.concat arg.sexp_body | ||
| result.sexp_body += arg.sexp_body | ||
| else | ||
@@ -576,10 +601,9 @@ result << arg | ||
| str = str.dup | ||
| has_enc = str.respond_to? :encoding # TODO: remove | ||
| encoding = nil | ||
| header = str.each_line.first(2) | ||
| header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc | ||
| header.map! { |s| s.force_encoding "ASCII-8BIT" } | ||
| first = header.first || "" | ||
| encoding, str = +"utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/ | ||
| encoding, str = +"utf-8", str.b[3..-1] if first =~ /\A\xEF\xBB\xBF/n | ||
@@ -592,11 +616,7 @@ encoding = $1.strip if header.find { |s| | ||
| if encoding then | ||
| if has_enc then | ||
| encoding.sub!(/utf-8-.+$/, "utf-8") # HACK for stupid emacs formats | ||
| hack_encoding str, encoding | ||
| else | ||
| warn "Skipping magic encoding comment" | ||
| end | ||
| encoding.sub!(/utf-8-.+$/, "utf-8") # HACK for stupid emacs formats | ||
| hack_encoding str, encoding | ||
| else | ||
| # nothing specified... ugh. try to encode as utf-8 | ||
| hack_encoding str if has_enc | ||
| hack_encoding str | ||
| end | ||
@@ -656,3 +676,6 @@ | ||
| lineno = head.line | ||
| tail[1] = head.last + tail[1] | ||
| _, h1 = head | ||
| _, t1, *rest = tail | ||
| tail.sexp_body = [h1 + t1, *rest] | ||
| head = tail | ||
@@ -662,4 +685,8 @@ head.line = lineno | ||
| tail.sexp_type = :array | ||
| tail[1] = s(:str, tail[1]).line tail.line | ||
| tail.delete_at 1 if tail[1] == s(:str, "") | ||
| _, tail_s, *tail_r = tail | ||
| if tail_s == "" then | ||
| tail.sexp_body = tail_r | ||
| else | ||
| tail.sexp_body = [s(:str, tail_s).line(tail.line), *tail_r] | ||
| end | ||
@@ -676,5 +703,7 @@ head.push(*tail.sexp_body) | ||
| if head.size == 2 and tail.size > 1 and tail[1].sexp_type == :str then | ||
| head[-1] = head[-1].dup if head[-1].frozen? | ||
| head.last << tail[1].last | ||
| _, t1, * = tail | ||
| if head.size == 2 and tail.size > 1 and t1.sexp_type == :str then | ||
| _, h1 = head | ||
| head.sexp_body = [h1.dup] if h1.frozen? # this is dumb | ||
| head.last << t1.last | ||
| head.sexp_type = :str if head.size == 2 # HACK ? | ||
@@ -861,3 +890,3 @@ else | ||
| result << res | ||
| res = res.resbody(true) | ||
| res = res.find_node :resbody, :delete | ||
| end | ||
@@ -931,4 +960,5 @@ | ||
| result[2..-1].each do |node| | ||
| block = node.block(:delete) | ||
| _, _expr, *cases = result | ||
| cases.each do |node| | ||
| block = node.find_node :block, :delete | ||
| node.concat block.sexp_body if block | ||
@@ -1145,4 +1175,3 @@ end | ||
| name = kw_rest_arg.value | ||
| # TODO: I _hate_ this: | ||
| assignable [name, kw_rest_arg.line] if name != :** | ||
| assignable kw_rest_arg if name != :** | ||
| result << kw_rest_arg | ||
@@ -1350,19 +1379,21 @@ end | ||
| when :str then | ||
| _, str = node | ||
| node.sexp_type = :lit | ||
| node[1] = if k then | ||
| Regexp.new(node[1], o, k) | ||
| else | ||
| begin | ||
| Regexp.new(node[1], o) | ||
| rescue RegexpError => e | ||
| warn "WARNING: #{e.message} for #{node[1].inspect} #{options.inspect}" | ||
| begin | ||
| warn "WARNING: trying to recover with ENC_UTF8" | ||
| Regexp.new(node[1], Regexp::ENC_UTF8) | ||
| rescue RegexpError => e | ||
| warn "WARNING: trying to recover with ENC_NONE" | ||
| Regexp.new(node[1], Regexp::ENC_NONE) | ||
| end | ||
| end | ||
| val = if k then | ||
| Regexp.new(str, o, k) | ||
| else | ||
| begin | ||
| Regexp.new(str, o) | ||
| rescue RegexpError => e | ||
| warn "WARNING: #{e.message} for #{str.inspect} #{options.inspect}" | ||
| begin | ||
| warn "WARNING: trying to recover with ENC_UTF8" | ||
| Regexp.new(str, Regexp::ENC_UTF8) | ||
| rescue RegexpError => e | ||
| warn "WARNING: trying to recover with ENC_NONE" | ||
| Regexp.new(str, Regexp::ENC_NONE) | ||
| end | ||
| end | ||
| end | ||
| node.sexp_body = [val] | ||
| when :dstr then | ||
@@ -1590,3 +1621,3 @@ if options =~ /o/ then | ||
| self.file = file.dup | ||
| self.file = file | ||
@@ -1593,0 +1624,0 @@ @yydebug = ENV.has_key? "DEBUG" |
@@ -41,3 +41,3 @@ require "ruby_parser_extras" | ||
| end | ||
| raise e | ||
| raise RubyParser::SyntaxError, e.message | ||
| end | ||
@@ -87,2 +87,3 @@ | ||
| require "ruby_parser33" | ||
| require "ruby_parser34" | ||
@@ -92,2 +93,3 @@ class RubyParser # HACK | ||
| class V34 < ::Ruby34Parser; end | ||
| class V33 < ::Ruby33Parser; end | ||
@@ -94,0 +96,0 @@ class V32 < ::Ruby32Parser; end |
+1
-0
@@ -33,2 +33,3 @@ .autotest | ||
| lib/ruby_parser33.rb | ||
| lib/ruby_parser34.rb | ||
| lib/ruby_parser_extras.rb | ||
@@ -35,0 +36,0 @@ test/test_ruby_lexer.rb |
+28
-19
@@ -12,3 +12,2 @@ # -*- ruby -*- | ||
| Hoe.add_include_dirs "../../sexp_processor/dev/lib" | ||
| Hoe.add_include_dirs "../../minitest/dev/lib" | ||
| Hoe.add_include_dirs "../../oedipus_lex/dev/lib" | ||
@@ -18,3 +17,3 @@ Hoe.add_include_dirs "../../ruby2ruby/dev/lib" | ||
| V2 = %w[20 21 22 23 24 25 26 27] | ||
| V3 = %w[30 31 32 33] | ||
| V3 = %w[30 31 32 33 34] | ||
@@ -38,3 +37,3 @@ VERS = V2 + V3 | ||
| require_ruby_version [">= 2.6", "< 4"] | ||
| require_ruby_version ">= 3.2" | ||
@@ -149,4 +148,13 @@ if plugin? :perforce then # generated files | ||
| task :compare_build => :generate | ||
| task :compare => :compare_build | ||
| task :compare => :compare_build do | ||
| if ENV["V"] then | ||
| versions = task(:compare_build).prerequisites | ||
| latest_diff = task(versions.last).prerequisites.last | ||
| file = File.read latest_diff | ||
| puts | ||
| puts file | ||
| end | ||
| end | ||
| def ruby_parse version | ||
@@ -171,3 +179,3 @@ v = version[/^\d+\.\d+/].delete "." | ||
| file parse_y => tarball do | ||
| file ruby_dir => tarball do | ||
| extract_glob = case | ||
@@ -184,3 +192,8 @@ when version > "3.3" then | ||
| system "tar xf #{tarball} -C compare #{File.basename ruby_dir}/#{extract_glob}" | ||
| end | ||
| file parse_y => ruby_dir do | ||
| # env -u RUBYOPT rake compare/parse33.y | ||
| warn "Warning: RUBYOPT is set! Use 'env -u RUBYOPT rake'" if ENV["RUBYOPT"] | ||
| # Debugging a new parse build system: | ||
@@ -190,6 +203,6 @@ # | ||
| # | ||
| # % touch parse.y; make -n parse.c | ||
| # % [ -e Makefile ] || ./configure ; make -n -W parse.y parse.c | ||
| # ... | ||
| # echo generating parse.c | ||
| # /Users/ryan/.rubies.current/bin/ruby --disable=gems ./tool/id2token.rb parse.y | \ | ||
| # ruby --disable=gems ./tool/id2token.rb parse.y | \ | ||
| # ruby ./tool/lrama/exe/lrama -oparse.c -Hparse.h - parse.y | ||
@@ -207,8 +220,2 @@ # | ||
| sh cmd | ||
| if File.exist? "#{d}/tool/lrama" then # UGH: this is dumb | ||
| rm_rf "compare/lrama" | ||
| sh "mv #{d}/tool/lrama compare" | ||
| end | ||
| sh "rm -rf #{d}" | ||
| end | ||
@@ -222,4 +229,5 @@ | ||
| file mri_txt => [parse_y, normalize] do | ||
| d = ruby_dir | ||
| if version > "3.3" then | ||
| sh "./compare/lrama/exe/lrama -r all -ocompare/parse#{v}.tab.c #{parse_y}" | ||
| sh "./#{d}/tool/lrama/exe/lrama -r states --report-file=compare/parse#{v}.output -ocompare/parse#{v}.tab.c #{parse_y}" | ||
| else | ||
@@ -259,3 +267,3 @@ sh "#{bison} -r all #{parse_y}" | ||
| task :clean do | ||
| rm_f Dir[mri_txt, rp_txt] | ||
| rm_rf Dir[mri_txt, rp_txt, ruby_dir] | ||
| end | ||
@@ -308,6 +316,7 @@ | ||
| ruby_parse "2.7.8" | ||
| ruby_parse "3.0.6" | ||
| ruby_parse "3.1.4" | ||
| ruby_parse "3.2.2" | ||
| ruby_parse "3.3.0" | ||
| ruby_parse "3.0.7" | ||
| ruby_parse "3.1.7" | ||
| ruby_parse "3.2.9" | ||
| ruby_parse "3.3.9" | ||
| ruby_parse "3.4.5" | ||
@@ -314,0 +323,0 @@ task :debug => :isolate do |
+12
-0
@@ -7,2 +7,14 @@ = ruby_parser | ||
| == NOTICE: | ||
| With the advent of prism, and it's ruby_parser compatibility mode, | ||
| this project is EOL. I've switched all my projects that use | ||
| ruby_parser over to prism with only a minimum of fixes sent upstream. | ||
| Use prism ~> 1.7 for best results. | ||
| I'm doing one more release to get my standing changes out into the | ||
| world and might follow up with another if anything adverse is | ||
| reported... but other than that, I'm very happy to put this out to | ||
| pasture. | ||
| == DESCRIPTION: | ||
@@ -9,0 +21,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display