ruby_parser
Advanced tools
| #!/usr/bin/ruby -w | ||
| good = false | ||
| rules = Hash.new { |h,k| h[k] = [] } | ||
| rule = nil | ||
| order = [] | ||
| def munge s | ||
| renames = [ | ||
| "'='", "tEQL", | ||
| "'!'", "tBANG", | ||
| "'%'", "tPERCENT", | ||
| "'&'", "tAMPER2", | ||
| "'('", "tLPAREN2", | ||
| "')'", "tRPAREN", | ||
| "'*'", "tSTAR2", | ||
| "'+'", "tPLUS", | ||
| "','", "tCOMMA", | ||
| "'-'", "tMINUS", | ||
| "'.'", "tDOT", | ||
| "'/'", "tDIVIDE", | ||
| "';'", "tSEMI", | ||
| "':'", "tCOLON", | ||
| "'<'", "tLT", | ||
| "'>'", "tGT", | ||
| "'?'", "tEH", | ||
| "'['", "tLBRACK", | ||
| "'\\n'", "tNL", | ||
| "']'", "tRBRACK", | ||
| "'^'", "tCARET", | ||
| "'`'", "tBACK_REF2", | ||
| "'{'", "tLCURLY", | ||
| "'|'", "tPIPE", | ||
| "'}'", "tRCURLY", | ||
| "'~'", "tTILDE", | ||
| '"["', "tLBRACK", | ||
| # 2.0 changes? | ||
| '"<=>"', "tCMP", | ||
| '"=="', "tEQ", | ||
| '"==="', "tEQQ", | ||
| '"!~"', "tNMATCH", | ||
| '"=~"', "tMATCH", | ||
| '">="', "tGEQ", | ||
| '"<="', "tLEQ", | ||
| '"!="', "tNEQ", | ||
| '"<<"', "tLSHFT", | ||
| '">>"', "tRSHFT", | ||
| '"*"', "tSTAR", | ||
| '".."', "tDOT2", | ||
| '"&"', "tAMPER", | ||
| '"&&"', "tANDOP", | ||
| '"||"', "tOROP", | ||
| '"..."', "tDOT3", | ||
| '"**"', "tPOW", | ||
| '"unary+"', "tUPLUS", | ||
| '"unary-"', "tUMINUS", | ||
| '"[]"', "tAREF", | ||
| '"[]="', "tASET", | ||
| '"::"', "tCOLON2", | ||
| '"{ arg"', "tLBRACE_ARG", | ||
| '"( arg"', "tLPAREN_ARG", | ||
| '"("', "tLPAREN", | ||
| 'rparen', "tRPAREN", | ||
| '"{"', "tLBRACE", | ||
| '"=>"', "tASSOC", | ||
| '"->"', "tLAMBDA", | ||
| '":: at EXPR_BEG"', "tCOLON3", | ||
| '"**arg"', "tDSTAR", | ||
| '","', "tCOMMA", | ||
| # other | ||
| 'tLBRACK2', "tLBRACK", # HACK | ||
| "' '", "tSPACE", # needs to be later to avoid bad hits | ||
| "/* empty */", "none", | ||
| /^\s*$/, "none", | ||
| "keyword_BEGIN", "klBEGIN", | ||
| "keyword_END", "klEND", | ||
| /keyword_(\w+)/, proc { "k#{$1.upcase}" }, | ||
| /\bk_([a-z_]+)/, proc { "k#{$1.upcase}" }, | ||
| /modifier_(\w+)/, proc { "k#{$1.upcase}_MOD" }, | ||
| "kVARIABLE", "keyword_variable", # ugh | ||
| /@(\d+)\s+/, "", | ||
| ] | ||
| renames.each_slice(2) do |(a, b)| | ||
| if Proc === b then | ||
| s.gsub!(a, &b) | ||
| else | ||
| s.gsub!(a, b) | ||
| end | ||
| end | ||
| s.strip | ||
| end | ||
| ARGF.each_line do |line| | ||
| next unless good or line =~ /^-* ?Grammar|\$accept : / | ||
| case line.strip | ||
| when /^$/ then | ||
| when /^(\d+) (\$?\w+): (.*)/ then # yacc | ||
| rule = $2 | ||
| order << rule unless rules.has_key? rule | ||
| rules[rule] << munge($3) | ||
| when /^(\d+) \s+\| (.*)/ then # yacc | ||
| rules[rule] << munge($2) | ||
| when /^(\d+) (@\d+): (.*)/ then # yacc | ||
| rule = $2 | ||
| order << rule unless rules.has_key? rule | ||
| rules[rule] << munge($3) | ||
| when /^rule (\d+) (@?\w+):(.*)/ then # racc | ||
| rule = $2 | ||
| order << rule unless rules.has_key? rule | ||
| rules[rule] << munge($3) | ||
| when /\$accept/ then # byacc? | ||
| good = true | ||
| when /Grammar/ then # both | ||
| good = true | ||
| when /^-+ Symbols/ then # racc | ||
| break | ||
| when /^Terminals/ then # yacc | ||
| break | ||
| when /^\cL/ then # byacc | ||
| break | ||
| else | ||
| warn "unparsed: #{$.}: #{line.chomp}" | ||
| end | ||
| end | ||
| require 'yaml' | ||
| order.each do |k| | ||
| next if k =~ /@/ | ||
| puts | ||
| puts "#{k}:" | ||
| puts rules[k].map { |r| " #{r}" }.join "\n" | ||
| end |
| # :stopdoc: | ||
| # WHY do I have to do this?!? | ||
| class Regexp | ||
| ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense | ||
| unless defined? ENC_NONE then | ||
| ENC_NONE = /x/n.options | ||
| ENC_EUC = /x/e.options | ||
| ENC_SJIS = /x/s.options | ||
| ENC_UTF8 = /x/u.options | ||
| end | ||
| end | ||
| # I hate ruby 1.9 string changes | ||
| class Fixnum | ||
| def ord | ||
| self | ||
| end | ||
| end unless "a"[0] == "a" | ||
| # :startdoc: | ||
| ############################################################ | ||
| # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK | ||
| unless "".respond_to?(:grep) then | ||
| class String | ||
| def grep re | ||
| lines.grep re | ||
| end | ||
| end | ||
| end | ||
| class String | ||
| ## | ||
| # This is a hack used by the lexer to sneak in line numbers at the | ||
| # identifier level. This should be MUCH smaller than making | ||
| # process_token return [value, lineno] and modifying EVERYTHING that | ||
| # reduces tIDENTIFIER. | ||
| attr_accessor :lineno | ||
| end | ||
| require "sexp" | ||
| class Sexp | ||
| attr_writer :paren | ||
| def paren | ||
| @paren ||= false | ||
| end | ||
| def value | ||
| raise "multi item sexp" if size > 2 | ||
| last | ||
| end | ||
| def to_sym | ||
| raise "no: #{self.inspect}.to_sym is a bug" | ||
| self.value.to_sym | ||
| end | ||
| alias :add :<< | ||
| def add_all x | ||
| self.concat x.sexp_body | ||
| end | ||
| def block_pass? | ||
| any? { |s| Sexp === s && s[0] == :block_pass } | ||
| end | ||
| alias :node_type :sexp_type | ||
| alias :values :sexp_body # TODO: retire | ||
| end | ||
| # END HACK | ||
| ############################################################ |
| require "strscan" | ||
| class RPStringScanner < StringScanner | ||
| # if ENV['TALLY'] then | ||
| # alias :old_getch :getch | ||
| # def getch | ||
| # warn({:getch => caller[0]}.inspect) | ||
| # old_getch | ||
| # end | ||
| # end | ||
| if "".respond_to? :encoding then | ||
| if "".respond_to? :byteslice then | ||
| def string_to_pos | ||
| string.byteslice(0, pos) | ||
| end | ||
| else | ||
| def string_to_pos | ||
| string.bytes.first(pos).pack("c*").force_encoding(string.encoding) | ||
| end | ||
| end | ||
| def charpos | ||
| string_to_pos.length | ||
| end | ||
| else | ||
| alias :charpos :pos | ||
| def string_to_pos | ||
| string[0..pos] | ||
| end | ||
| end | ||
| def unread_many str # TODO: remove this entirely - we should not need it | ||
| warn({:unread_many => caller[0]}.inspect) if ENV['TALLY'] | ||
| begin | ||
| string[charpos, 0] = str | ||
| rescue IndexError | ||
| # HACK -- this is a bandaid on a dirty rag on an open festering wound | ||
| end | ||
| end | ||
| if ENV['DEBUG'] then | ||
| alias :old_getch :getch | ||
| def getch | ||
| c = self.old_getch | ||
| p :getch => [c, caller.first] | ||
| c | ||
| end | ||
| alias :old_scan :scan | ||
| def scan re | ||
| s = old_scan re | ||
| where = caller[1].split(/:/).first(2).join(":") | ||
| d :scan => [s, where] if s | ||
| s | ||
| end | ||
| end | ||
| def d o | ||
| $stderr.puts o.inspect | ||
| end | ||
| end | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
+29
-0
@@ -0,1 +1,30 @@ | ||
| === 3.9.0 / 2017-04-13 | ||
| * 1 major enhancement: | ||
| * Added tentative ruby 2.4 support. Probably missing things. | ||
| * 13 minor enhancements: | ||
| * Added RubyParser.latest. | ||
| * Added RubyParser::Parser.version to make version range comparisons easier | ||
| * Changed superclasses of all parsers to RubyParser::Parser | ||
| * Cleaned up a lot of the version specific code to be more generic/flexible. | ||
| * Documented how to add new versions in README. | ||
| * Moved RubyParser from ruby_parser_extras.rb into ruby_parser.rb to fix bootstrap issues. | ||
| * Renamed RubyParserStuff#get_match_node to new_match. Aliased and deprecated. | ||
| * Renamed RubyParserStuff#logop to logical_op. Aliased and deprecated. | ||
| * Renamed RubyParserStuff#node_assign to new_assign. Aliased and deprecated. | ||
| * Renamed all parsers to RubyParser::V##. | ||
| * Revamped grammar preprocessing to make adding new versions easier. | ||
| * RubyParser.for_current_ruby falls back to latest if current not available. | ||
| * Subclasses of RubyParser::Parser register themselves into RubyParser::VERSIONS. | ||
| * 4 bug fixes: | ||
| * Fixed `&.` after newline. (presidentbeef) | ||
| * Fixed bug setting line number for hash literals to line of opening brace. | ||
| * Fixed grammar preprocessing bug. | ||
| * Properly handle kDO with no-parens stabby lambda. (presidentbeef) | ||
| === 3.8.4 / 2017-01-13 | ||
@@ -2,0 +31,0 @@ |
+11
-16
@@ -6,5 +6,5 @@ # encoding: UTF-8 | ||
| # :stopdoc: | ||
| RUBY19 = "".respond_to? :encoding | ||
| HAS_ENC = "".respond_to? :encoding | ||
| IDENT_CHAR = if RUBY19 then | ||
| IDENT_CHAR = if HAS_ENC then | ||
| /[\w\u0080-\u{10ffff}]/u | ||
@@ -289,3 +289,3 @@ else | ||
| def ruby22_label? | ||
| ruby22? and is_label_possible? | ||
| ruby22plus? and is_label_possible? | ||
| end | ||
@@ -486,3 +486,3 @@ | ||
| if scan(/([\ \t\r\f\v]*)\./) then | ||
| if scan(/([\ \t\r\f\v]*)(\.|&)/) then | ||
| self.space_seen = true unless ss[1].empty? | ||
@@ -784,3 +784,3 @@ | ||
| self.paren_nest -= 1 | ||
| result(state, :kDO_LAMBDA, value) | ||
| expr_result(:kDO_LAMBDA, value) | ||
| when cond.is_in_state then | ||
@@ -916,9 +916,5 @@ result(state, :kDO_COND, value) | ||
| def ruby18 | ||
| Ruby18Parser === parser | ||
| RubyParser::V18 === parser | ||
| end | ||
| def ruby19 | ||
| Ruby19Parser === parser | ||
| end | ||
| def scan re | ||
@@ -1055,3 +1051,3 @@ ss.scan re | ||
| re = if qwords then | ||
| if RUBY19 then | ||
| if HAS_ENC then | ||
| /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever | ||
@@ -1107,3 +1103,3 @@ else | ||
| end | ||
| x.force_encoding "UTF-8" if RUBY19 | ||
| x.force_encoding "UTF-8" if HAS_ENC | ||
| x | ||
@@ -1116,5 +1112,4 @@ end | ||
| def ruby22? | ||
| Ruby22Parser === parser or | ||
| Ruby23Parser === parser | ||
| def ruby22plus? | ||
| parser.class.version >= 22 | ||
| end | ||
@@ -1131,3 +1126,3 @@ | ||
| if ruby22? && token_type == :tSTRING_END && ["'", '"'].include?(c) then | ||
| if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then | ||
| if (([:expr_beg, :expr_endfn].include?(lex_state) && | ||
@@ -1134,0 +1129,0 @@ !cond.is_in_state) || is_arg?) && |
+34
-237
| # encoding: ASCII-8BIT | ||
| require 'stringio' | ||
| require 'racc/parser' | ||
| require 'sexp' | ||
| require 'strscan' | ||
| require 'ruby_lexer' | ||
| require "sexp" | ||
| require "ruby_lexer" | ||
| require "timeout" | ||
| require "rp_extensions" | ||
| require "rp_stringscanner" | ||
| # :stopdoc: | ||
| # WHY do I have to do this?!? | ||
| class Regexp | ||
| ONCE = 0 unless defined? ONCE # FIX: remove this - it makes no sense | ||
| unless defined? ENC_NONE then | ||
| ENC_NONE = /x/n.options | ||
| ENC_EUC = /x/e.options | ||
| ENC_SJIS = /x/s.options | ||
| ENC_UTF8 = /x/u.options | ||
| end | ||
| end | ||
| # I hate ruby 1.9 string changes | ||
| class Fixnum | ||
| def ord | ||
| self | ||
| end | ||
| end unless "a"[0] == "a" | ||
| # :startdoc: | ||
| class RPStringScanner < StringScanner | ||
| # if ENV['TALLY'] then | ||
| # alias :old_getch :getch | ||
| # def getch | ||
| # warn({:getch => caller[0]}.inspect) | ||
| # old_getch | ||
| # end | ||
| # end | ||
| if "".respond_to? :encoding then | ||
| if "".respond_to? :byteslice then | ||
| def string_to_pos | ||
| string.byteslice(0, pos) | ||
| end | ||
| else | ||
| def string_to_pos | ||
| string.bytes.first(pos).pack("c*").force_encoding(string.encoding) | ||
| end | ||
| end | ||
| def charpos | ||
| string_to_pos.length | ||
| end | ||
| else | ||
| alias :charpos :pos | ||
| def string_to_pos | ||
| string[0..pos] | ||
| end | ||
| end | ||
| def unread_many str # TODO: remove this entirely - we should not need it | ||
| warn({:unread_many => caller[0]}.inspect) if ENV['TALLY'] | ||
| begin | ||
| string[charpos, 0] = str | ||
| rescue IndexError | ||
| # HACK -- this is a bandaid on a dirty rag on an open festering wound | ||
| end | ||
| end | ||
| if ENV['DEBUG'] then | ||
| alias :old_getch :getch | ||
| def getch | ||
| c = self.old_getch | ||
| p :getch => [c, caller.first] | ||
| c | ||
| end | ||
| alias :old_scan :scan | ||
| def scan re | ||
| s = old_scan re | ||
| where = caller[1].split(/:/).first(2).join(":") | ||
| d :scan => [s, where] if s | ||
| s | ||
| end | ||
| end | ||
| def d o | ||
| $stderr.puts o.inspect | ||
| end | ||
| end | ||
| module RubyParserStuff | ||
| VERSION = "3.8.4" unless constants.include? "VERSION" # SIGH | ||
| VERSION = "3.9.0" | ||
@@ -110,4 +26,11 @@ attr_accessor :lexer, :in_def, :in_single, :file | ||
| ruby19 = "".respond_to? :encoding | ||
| def self.deprecate old, new | ||
| define_method old do |*args| | ||
| warn "DEPRECATED: #{old} -> #{new} from #{caller.first}" | ||
| send new, *args | ||
| end | ||
| end | ||
| has_enc = "".respond_to? :encoding | ||
| # This is in sorted order of occurrence according to | ||
@@ -126,3 +49,3 @@ # charlock_holmes against 500k files, with UTF_8 forced | ||
| Encoding::EUC_JP | ||
| ] if ruby19 | ||
| ] if has_enc | ||
@@ -346,3 +269,3 @@ def syntax_error msg | ||
| def get_match_node lhs, rhs # TODO: rename to new_match | ||
| def new_match lhs, rhs | ||
| if lhs then | ||
@@ -369,2 +292,5 @@ case lhs[0] | ||
| # TODO: remove in 4.0 or 2018-01, whichever is first | ||
| deprecate :get_match_node, :new_match | ||
| def gettable(id) | ||
@@ -484,3 +410,3 @@ lineno = id.lineno if id.respond_to? :lineno | ||
| def logop(type, left, right) # TODO: rename logical_op | ||
| def logical_op type, left, right | ||
| left = value_expr left | ||
@@ -503,2 +429,5 @@ | ||
| # TODO: remove in 4.0 or 2018-01, whichever is first | ||
| deprecate :logop, :logical_op | ||
| def new_aref val | ||
@@ -700,2 +629,6 @@ val[2] ||= s(:arglist) | ||
| def new_hash val | ||
| s(:hash, *val[2].values).line(val[1]) | ||
| end | ||
| def new_if c, t, f | ||
@@ -1038,3 +971,3 @@ l = [c.line, t && t.line, f && f.line].compact.min | ||
| def node_assign(lhs, rhs) # TODO: rename new_assign | ||
| def new_assign lhs, rhs | ||
| return nil unless lhs | ||
@@ -1057,2 +990,5 @@ | ||
| # TODO: remove in 4.0 or 2018-01, whichever is first | ||
| deprecate :node_assign, :new_assign | ||
| ## | ||
@@ -1078,7 +1014,7 @@ # Returns a UTF-8 encoded string after processing BOMs and magic | ||
| str = str.dup | ||
| ruby19 = str.respond_to? :encoding | ||
| has_enc = str.respond_to? :encoding | ||
| encoding = nil | ||
| header = str.lines.first(2) | ||
| header.map! { |s| s.force_encoding "ASCII-8BIT" } if ruby19 | ||
| header.map! { |s| s.force_encoding "ASCII-8BIT" } if has_enc | ||
@@ -1094,3 +1030,3 @@ first = header.first || "" | ||
| if encoding then | ||
| if ruby19 then | ||
| if has_enc then | ||
| encoding.sub!(/utf-8-.+$/, 'utf-8') # HACK for stupid emacs formats | ||
@@ -1103,3 +1039,3 @@ hack_encoding str, encoding | ||
| # nothing specified... ugh. try to encode as utf-8 | ||
| hack_encoding str if ruby19 | ||
| hack_encoding str if has_enc | ||
| end | ||
@@ -1436,140 +1372,1 @@ | ||
| end | ||
| class Ruby23Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| end | ||
| class Ruby22Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| end | ||
| class Ruby21Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| end | ||
| class Ruby20Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| end | ||
| class Ruby19Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| end | ||
| class Ruby18Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| end | ||
| ## | ||
| # RubyParser is a compound parser that first attempts to parse using | ||
| # the 1.9 syntax parser and falls back to the 1.8 syntax parser on a | ||
| # parse error. | ||
| class RubyParser | ||
| class SyntaxError < RuntimeError; end | ||
| def initialize | ||
| @p18 = Ruby18Parser.new | ||
| @p19 = Ruby19Parser.new | ||
| @p20 = Ruby20Parser.new | ||
| @p21 = Ruby21Parser.new | ||
| @p22 = Ruby22Parser.new | ||
| @p23 = Ruby23Parser.new | ||
| end | ||
| def process s, f = "(string)", t = 10 | ||
| e = nil | ||
| [@p23, @p22, @p21, @p20, @p19, @p18].each do |parser| | ||
| begin | ||
| return parser.process s, f, t | ||
| rescue Racc::ParseError, RubyParser::SyntaxError => exc | ||
| e = exc | ||
| end | ||
| end | ||
| raise e | ||
| end | ||
| alias :parse :process | ||
| def reset | ||
| @p18.reset | ||
| @p19.reset | ||
| @p20.reset | ||
| @p21.reset | ||
| @p22.reset | ||
| @p23.reset | ||
| end | ||
| def self.for_current_ruby | ||
| case RUBY_VERSION | ||
| when /^1\.8/ then | ||
| Ruby18Parser.new | ||
| when /^1\.9/ then | ||
| Ruby19Parser.new | ||
| when /^2.0/ then | ||
| Ruby20Parser.new | ||
| when /^2.1/ then | ||
| Ruby21Parser.new | ||
| when /^2.2/ then | ||
| Ruby22Parser.new | ||
| when /^2.3/ then | ||
| Ruby23Parser.new | ||
| else | ||
| raise "unrecognized RUBY_VERSION #{RUBY_VERSION}" | ||
| end | ||
| end | ||
| end | ||
| ############################################################ | ||
| # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK | ||
| unless "".respond_to?(:grep) then | ||
| class String | ||
| def grep re | ||
| lines.grep re | ||
| end | ||
| end | ||
| end | ||
| class String | ||
| ## | ||
| # This is a hack used by the lexer to sneak in line numbers at the | ||
| # identifier level. This should be MUCH smaller than making | ||
| # process_token return [value, lineno] and modifying EVERYTHING that | ||
| # reduces tIDENTIFIER. | ||
| attr_accessor :lineno | ||
| end | ||
| class Sexp | ||
| attr_writer :paren | ||
| def paren | ||
| @paren ||= false | ||
| end | ||
| def value | ||
| raise "multi item sexp" if size > 2 | ||
| last | ||
| end | ||
| def to_sym | ||
| raise "no: #{self.inspect}.to_sym is a bug" | ||
| self.value.to_sym | ||
| end | ||
| alias :add :<< | ||
| def add_all x | ||
| self.concat x.sexp_body | ||
| end | ||
| def block_pass? | ||
| any? { |s| Sexp === s && s[0] == :block_pass } | ||
| end | ||
| alias :node_type :sexp_type | ||
| alias :values :sexp_body # TODO: retire | ||
| end | ||
| # END HACK | ||
| ############################################################ |
+86
-7
@@ -1,7 +0,86 @@ | ||
| require 'ruby18_parser' | ||
| require 'ruby19_parser' | ||
| require 'ruby20_parser' | ||
| require 'ruby21_parser' | ||
| require 'ruby22_parser' | ||
| require 'ruby23_parser' | ||
| require 'ruby_parser_extras' | ||
| require "ruby_parser_extras" | ||
| require "racc/parser" | ||
| ## | ||
| # RubyParser is a compound parser that uses all known versions to | ||
| # attempt to parse. | ||
| class RubyParser | ||
| VERSIONS = [] | ||
| class Parser < Racc::Parser | ||
| include RubyParserStuff | ||
| def self.inherited x | ||
| RubyParser::VERSIONS << x | ||
| end | ||
| def self.version | ||
| Parser > self and self.name[/V(\d+)$/, 1].to_i | ||
| end | ||
| end | ||
| class SyntaxError < RuntimeError; end | ||
| def process s, f = "(string)", t = 10 | ||
| e = nil | ||
| VERSIONS.each do |klass| | ||
| parser = klass.new | ||
| begin | ||
| return parser.process s, f, t | ||
| rescue Racc::ParseError, RubyParser::SyntaxError => exc | ||
| e = exc | ||
| end | ||
| end | ||
| raise e | ||
| end | ||
| alias :parse :process | ||
| def reset | ||
| # do nothing | ||
| end | ||
| def self.latest | ||
| VERSIONS.first.new | ||
| end | ||
| def self.for_current_ruby | ||
| name = "V#{RUBY_VERSION[/^\d+\.\d+/].delete "."}" | ||
| klass = if const_defined? name then | ||
| const_get name | ||
| else | ||
| latest = VERSIONS.first | ||
| warn "NOTE: RubyParser::#{name} undefined, using #{latest}." | ||
| latest | ||
| end | ||
| klass.new | ||
| end | ||
| end | ||
| ## | ||
| # Unfortunately a problem with racc is that it won't let me namespace | ||
| # properly, so instead of RubyParser::V18, I still have to generate | ||
| # the old Ruby23Parser and shove it in as V23. | ||
| require "ruby18_parser" | ||
| require "ruby19_parser" | ||
| require "ruby20_parser" | ||
| require "ruby21_parser" | ||
| require "ruby22_parser" | ||
| require "ruby23_parser" | ||
| require "ruby24_parser" | ||
| class RubyParser # HACK | ||
| VERSIONS.clear # also a HACK caused by racc namespace issues | ||
| class V24 < ::Ruby24Parser; end | ||
| class V23 < ::Ruby23Parser; end | ||
| class V22 < ::Ruby22Parser; end | ||
| class V21 < ::Ruby21Parser; end | ||
| class V20 < ::Ruby20Parser; end | ||
| class V19 < ::Ruby19Parser; end | ||
| class V18 < ::Ruby18Parser; end | ||
| end |
+5
-0
@@ -8,3 +8,6 @@ .autotest | ||
| bin/ruby_parse_extract_error | ||
| compare/normalize.rb | ||
| lib/.document | ||
| lib/rp_extensions.rb | ||
| lib/rp_stringscanner.rb | ||
| lib/ruby18_parser.rb | ||
@@ -22,2 +25,4 @@ lib/ruby18_parser.y | ||
| lib/ruby23_parser.y | ||
| lib/ruby24_parser.rb | ||
| lib/ruby24_parser.y | ||
| lib/ruby_lexer.rb | ||
@@ -24,0 +29,0 @@ lib/ruby_lexer.rex |
+126
-67
@@ -17,2 +17,6 @@ # -*- ruby -*- | ||
| V1 = %w[18 19] | ||
| V2 = %w[20 21 22 23 24] | ||
| V1_2 = V1 + V2 | ||
| Hoe.spec "ruby_parser" do | ||
@@ -28,41 +32,31 @@ developer "Ryan Davis", "ryand-ruby@zenspider.com" | ||
| if plugin? :perforce then # generated files | ||
| self.perforce_ignore << "lib/ruby18_parser.rb" | ||
| self.perforce_ignore << "lib/ruby19_parser.rb" | ||
| self.perforce_ignore << "lib/ruby20_parser.rb" | ||
| self.perforce_ignore << "lib/ruby20_parser.y" | ||
| self.perforce_ignore << "lib/ruby21_parser.rb" | ||
| self.perforce_ignore << "lib/ruby21_parser.y" | ||
| self.perforce_ignore << "lib/ruby22_parser.rb" | ||
| self.perforce_ignore << "lib/ruby22_parser.y" | ||
| self.perforce_ignore << "lib/ruby23_parser.rb" | ||
| self.perforce_ignore << "lib/ruby23_parser.y" | ||
| V1_2.each do |n| | ||
| self.perforce_ignore << "lib/ruby#{n}_parser.rb" | ||
| end | ||
| V2.each do |n| | ||
| self.perforce_ignore << "lib/ruby#{n}_parser.y" | ||
| end | ||
| self.perforce_ignore << "lib/ruby_lexer.rex.rb" | ||
| end | ||
| self.racc_flags << " -t" if plugin?(:racc) && ENV["DEBUG"] | ||
| if plugin?(:racc) | ||
| self.racc_flags << " -t" if ENV["DEBUG"] | ||
| self.racc_flags << " --superclass RubyParser::Parser" | ||
| # self.racc_flags << " --runtime ruby_parser" # TODO: broken in racc | ||
| end | ||
| end | ||
| file "lib/ruby20_parser.y" => "lib/ruby_parser.yy" do |t| | ||
| sh "unifdef -tk -DRUBY20 -URUBY21 -URUBY22 -URUBY23 -UDEAD #{t.source} > #{t.name} || true" | ||
| V2.each do |n| | ||
| file "lib/ruby#{n}_parser.y" => "lib/ruby_parser.yy" do |t| | ||
| cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name] | ||
| sh cmd | ||
| end | ||
| end | ||
| file "lib/ruby21_parser.y" => "lib/ruby_parser.yy" do |t| | ||
| sh "unifdef -tk -URUBY20 -DRUBY21 -URUBY22 -URUBY23 -UDEAD #{t.source} > #{t.name} || true" | ||
| V1_2.each do |n| | ||
| file "lib/ruby#{n}_parser.rb" => "lib/ruby#{n}_parser.y" | ||
| end | ||
| file "lib/ruby22_parser.y" => "lib/ruby_parser.yy" do |t| | ||
| sh "unifdef -tk -URUBY20 -URUBY21 -DRUBY22 -URUBY23 -UDEAD #{t.source} > #{t.name} || true" | ||
| end | ||
| file "lib/ruby23_parser.y" => "lib/ruby_parser.yy" do |t| | ||
| sh "unifdef -tk -URUBY20 -URUBY21 -URUBY22 -DRUBY23 -UDEAD #{t.source} > #{t.name} || true" | ||
| end | ||
| file "lib/ruby18_parser.rb" => "lib/ruby18_parser.y" | ||
| file "lib/ruby19_parser.rb" => "lib/ruby19_parser.y" | ||
| file "lib/ruby20_parser.rb" => "lib/ruby20_parser.y" | ||
| file "lib/ruby21_parser.rb" => "lib/ruby21_parser.y" | ||
| file "lib/ruby22_parser.rb" => "lib/ruby22_parser.y" | ||
| file "lib/ruby23_parser.rb" => "lib/ruby23_parser.y" | ||
| file "lib/ruby_lexer.rex.rb" => "lib/ruby_lexer.rex" | ||
@@ -99,28 +93,106 @@ | ||
| # to create parseXX.output: | ||
| # | ||
| # 1) check out the XX version of ruby | ||
| # 2) Edit uncommon.mk, find the ".y.c" rule and remove the RM lines | ||
| # 3) run `rm -f parse.c; make parse.c` | ||
| # 4) run `bison -r all parse.tmp.y` | ||
| # 5) mv parse.tmp.output parseXX.output | ||
| def in_compare | ||
| Dir.chdir "compare" do | ||
| yield | ||
| end | ||
| end | ||
| # possibly new instructions: | ||
| # | ||
| # 1) check out the XX version of ruby | ||
| # 2) YFLAGS="-r all" make parse.c | ||
| # 3) mv y.output parseXX.output | ||
| def dl v | ||
| dir = v[/^\d+\.\d+/] | ||
| url = "https://cache.ruby-lang.org/pub/ruby/#{dir}/ruby-#{v}.tar.bz2" | ||
| path = File.basename url | ||
| unless File.exist? path then | ||
| system "curl -O #{url}" | ||
| end | ||
| end | ||
| %w[18 19 20 21 22 23].each do |v| | ||
| task "compare#{v}" do | ||
| sh "./yack.rb lib/ruby#{v}_parser.output > racc#{v}.txt" | ||
| sh "./yack.rb parse#{v}.output > yacc#{v}.txt" | ||
| sh "diff -du racc#{v}.txt yacc#{v}.txt || true" | ||
| puts | ||
| sh "diff -du racc#{v}.txt yacc#{v}.txt | wc -l" | ||
| def ruby_parse version | ||
| v = version[/^\d+\.\d+/].delete "." | ||
| rp_txt = "rp#{v}.txt" | ||
| mri_txt = "mri#{v}.txt" | ||
| parse_y = "parse#{v}.y" | ||
| tarball = "ruby-#{version}.tar.bz2" | ||
| ruby_dir = "ruby-#{version}" | ||
| diff = "diff#{v}.diff" | ||
| rp_out = "lib/ruby#{v}_parser.output" | ||
| c_diff = "compare/#{diff}" | ||
| c_rp_txt = "compare/#{rp_txt}" | ||
| c_mri_txt = "compare/#{mri_txt}" | ||
| c_parse_y = "compare/#{parse_y}" | ||
| c_tarball = "compare/#{tarball}" | ||
| file tarball do | ||
| in_compare do | ||
| dl version | ||
| end | ||
| end | ||
| file c_parse_y => c_tarball do | ||
| in_compare do | ||
| system "tar yxf #{tarball} #{ruby_dir}/{id.h,parse.y,tool/{id2token.rb,vpath.rb}}" | ||
| Dir.chdir ruby_dir do | ||
| if File.exist? "tool/id2token.rb" then | ||
| sh "ruby tool/id2token.rb --path-separator=.:./ id.h parse.y > ../#{parse_y}" | ||
| else | ||
| cp "parse.y", "../#{parse_y}" | ||
| end | ||
| end | ||
| sh "rm -rf #{ruby_dir}" | ||
| end | ||
| end | ||
| file c_mri_txt => c_parse_y do | ||
| in_compare do | ||
| sh "bison -r all #{parse_y}" | ||
| sh "./normalize.rb parse#{v}.output > #{mri_txt}" | ||
| rm ["parse#{v}.output", "parse#{v}.tab.c"] | ||
| end | ||
| end | ||
| file rp_out => :parser | ||
| file c_rp_txt => rp_out do | ||
| in_compare do | ||
| sh "./normalize.rb ../#{rp_out} > #{rp_txt}" | ||
| end | ||
| end | ||
| compare = "compare#{v}" | ||
| desc "Compare all grammars to MRI" | ||
| task :compare => compare | ||
| task c_diff => [c_mri_txt, c_rp_txt] do | ||
| in_compare do | ||
| system "diff -du #{mri_txt} #{rp_txt} > #{diff}" | ||
| end | ||
| end | ||
| desc "Compare #{v} grammar to MRI #{version}" | ||
| task compare => c_diff do | ||
| in_compare do | ||
| system "wc -l #{diff}" | ||
| end | ||
| end | ||
| task :clean do | ||
| rm_f Dir[c_parse_y, c_mri_txt, c_rp_txt] | ||
| end | ||
| task :realclean do | ||
| rm_f Dir[tarball] | ||
| end | ||
| end | ||
| ruby_parse "1.8.7-p374" | ||
| ruby_parse "1.9.3-p551" | ||
| ruby_parse "2.0.0-p648" | ||
| ruby_parse "2.1.9" | ||
| ruby_parse "2.2.6" | ||
| ruby_parse "2.3.3" | ||
| # TODO ruby_parse "2.4.0" | ||
| task :debug => :isolate do | ||
| ENV["V"] ||= "23" | ||
| ENV["V"] ||= V1_2.last | ||
| Rake.application[:parser].invoke # this way we can have DEBUG set | ||
@@ -133,18 +205,5 @@ Rake.application[:lexer].invoke # this way we can have DEBUG set | ||
| parser = case ENV["V"] | ||
| when "18" then | ||
| Ruby18Parser.new | ||
| when "19" then | ||
| Ruby19Parser.new | ||
| when "20" then | ||
| Ruby20Parser.new | ||
| when "21" then | ||
| Ruby21Parser.new | ||
| when "22" then | ||
| Ruby22Parser.new | ||
| when "23" then | ||
| Ruby23Parser.new | ||
| else | ||
| raise "Unsupported version #{ENV["V"]}" | ||
| end | ||
| klass = Object.const_get("Ruby#{ENV["V"]}Parser") rescue nil | ||
| raise "Unsupported version #{ENV["V"]}" unless klass | ||
| parser = klass.new | ||
@@ -180,3 +239,3 @@ time = (ENV["RP_TIMEOUT"] || 10).to_i | ||
| task :extract => :isolate do | ||
| ENV["V"] ||= "19" | ||
| ENV["V"] ||= V1_2.last | ||
| Rake.application[:parser].invoke # this way we can have DEBUG set | ||
@@ -183,0 +242,0 @@ |
+12
-0
@@ -60,2 +60,14 @@ = ruby_parser | ||
| == DEVELOPER NOTES: | ||
| To add a new version: | ||
| * New parser should be generated from lib/ruby_parser.yy. | ||
| * Extend lib/ruby_parser.yy with new class name. | ||
| * Add new version number to Rakefile for rule creation. | ||
| * Require generated parser in lib/ruby_parser.rb. | ||
| * Add empty TestRubyParserShared##Plus module and TestRubyParserV## to test/test_ruby_parser.rb. | ||
| * Extend Manifest.txt with generated file names. | ||
| * Extend sexp_processor's pt_testcase.rb to match version | ||
| == REQUIREMENTS: | ||
@@ -62,0 +74,0 @@ |
| # encoding: US-ASCII | ||
| require 'rubygems' | ||
| require 'minitest/autorun' | ||
| require 'ruby_parser_extras' | ||
| require "minitest/autorun" | ||
| require "ruby_parser_extras" | ||
| require "ruby_parser" | ||
| require 'minitest/test' | ||
| class TestStackState < Minitest::Test | ||
@@ -18,3 +16,3 @@ attr_reader :s | ||
| orig_str = str.dup | ||
| p = Ruby19Parser.new | ||
| p = RubyParser.latest | ||
| s = nil | ||
@@ -21,0 +19,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display