diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..061d880804 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# it's a gem, ignore the lockfile +Gemfile.lock + +# build artifacts +*.gem diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000000..d62736d70f --- /dev/null +++ b/Gemfile @@ -0,0 +1,9 @@ +source 'http://rubygems.org' + +gemspec + +gem 'minitest' +gem 'sexp_processor', '~> 3.0' +gem 'wrong', '~> 0.6.2' + +gem 'rake' diff --git a/Guardfile b/Guardfile new file mode 100644 index 0000000000..757b163a1f --- /dev/null +++ b/Guardfile @@ -0,0 +1,3 @@ +guard :shell do + watch(/\.rb$/) { `rake` } +end diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000000..6305dfe480 --- /dev/null +++ b/Rakefile @@ -0,0 +1,15 @@ +require 'rake/clean' + +task :spec do + spec_files = FileList.new('./spec/**/*_spec.rb') + switch_spec_files = spec_files.map { |x| "-r#{x}" }.join(' ') + sh "ruby -I./lib -r ./spec/spec_helper #{switch_spec_files} -e Minitest::Unit.autorun" +end + +CLEAN.include('*.gem') +task :build => [:clean, :spec] do + puts + sh "gem build cacher.gemspec" +end + +task :default => :spec diff --git a/lib/rouge.rb b/lib/rouge.rb new file mode 100644 index 0000000000..ef1225c63f --- /dev/null +++ b/lib/rouge.rb @@ -0,0 +1,10 @@ +# stdlib +require 'pathname' + +module Rouge +end + +load_dir = Pathname.new(__FILE__).dirname +load load_dir.join('rouge/token.rb') +load load_dir.join('rouge/lexer.rb') +load load_dir.join('rouge/lexers/shell.rb') diff --git a/lib/rouge/lexer.rb b/lib/rouge/lexer.rb new file mode 100644 index 0000000000..909d2078d4 --- /dev/null +++ b/lib/rouge/lexer.rb @@ -0,0 +1,192 @@ +module Rouge + class Lexer + def initialize(&b) + instance_eval(&b) + end + + def default_options + {} + end + + def options(o={}) + (@options ||= default_options).merge!(o) + end + + def option(k, v=:absent) + if v == :absent + options[k.to_s] + else + options({ k.to_s => v }) + end + end + + def debug(&b) + puts(b.call) if option :debug + end + + def get_tokens(stream) + Enumerator.new do |out| + stream_tokens(stream) do |token, value| + out << [token, value] + end + end.to_a + end + + def stream_tokens(stream) + raise 'abstract' + end + end + + class RegexLexer < Lexer + class Rule + attr_reader :callback + attr_reader :next_lexer + attr_reader :re + def initialize(re, callback, next_lexer) + @re = Regexp.new %/\\A#{re.source}/ + @callback = callback + @next_lexer = next_lexer + end + + def consume(stream, &b) + # TODO: I'm sure there is a much faster way of doing this. + # also, encapsulate the stream in its own class. + match = stream.match(@re) + + if match + stream.slice!(0...$&.size) + yield match + return true + end + + false + end + end + + def initialize(parent=nil, &defn) + @parent = parent + super(&defn) + end + + def lexer(name, &defn) + name = name.to_s + + if block_given? + scope[name] = RegexLexer.new(self, &defn) + else + scope[name] || parent && parent.lexer(name) + end + end + + def scope + @scope ||= {} + end + + def mixin(lexer) + rules << lexer + end + + def rules + @rules ||= [] + end + + def rule(re, token=nil, next_lexer=nil, &callback) + if block_given? + next_lexer = token + else + if token.is_a? String + token = Token[token] + end + + callback = proc { |match, &b| b.call token, match } + end + + rules << Rule.new(re, callback, next_lexer) + end + + def step(stream, stack, &b) + debug { "parsing #{stream.inspect}" } + if stack.empty? + raise 'empty stack!' + end + + lexer = stack.last + + lexer.rules.each do |rule| + rule.consume(stream) do |match| + + return true + end + end + + return false + end + + def stream_tokens(stream, &b) + stream = stream.dup + stack = [self] + + stream_with_stack(stream.dup, [self], &b) + end + + def stream_with_stack(stream, stack, &b) + return true if stream.empty? + + until stream.empty? + debug { "parsing #{stream.inspect}" } + success = stack.last.step(stream, stack, &b) + + if !success + debug { " failed parse, returning text" } + b.call(Token['Text'], stream) + return false + end + end + end + + def step(stream, stack, &b) + rules.each do |rule| + debug { " trying #{rule.re.inspect}" } + return true if run_rule(rule, stream, stack, &b) + end + + false + end + + private + def get_lexer(o) + case o + when RegexLexer + o + else + lexer o + end + end + + def run_rule(rule, stream, stack, &b) + case rule + when String, RegexLexer + get_lexer(rule).step(stream, stack, &b) + when Rule + rule.consume(stream) do |match| + debug { " got #{match[0].inspect}" } + + rule.callback.call(*match) do |tok, res| + if tok.is_a? String + tok = Token[tok] + end + + b.call(tok, res) + end + + if rule.next_lexer == :pop! + stack.pop + elsif rule.next_lexer + stack.push get_lexer(rule.next_lexer) + end + end + end + end + + end +end diff --git a/lib/rouge/lexers/shell.rb b/lib/rouge/lexers/shell.rb new file mode 100644 index 0000000000..d0264c7318 --- /dev/null +++ b/lib/rouge/lexers/shell.rb @@ -0,0 +1,84 @@ +module Rouge + module Lexers + ShellLexer = RegexLexer.new do + lexer :basic do + rule / + \b(if|fi|else|while|do|done|for|then|return|function|case + |select|continue|until|esac|elif + )\s*\b + /x, 'Keyword' + + rule / + \b(alias|bg|bind|break|builtin|caller|cd|command|compgen + |complete|declare|dirs|disown|echo|enable|eval|exec|exit + |export|false|fc|fg|getopts|hash|help|history|jobs|kill|let + |local|logout|popd|printf|pushd|pwd|read|readonly|set|shift + |shopt|source|suspend|test|time|times|trap|true|type|typeset + |ulimit|umask|unalias|unset|wait + )\s*\b(?!\.) + /x, 'Name.Builtin' + + rule /#.*\n/, 'Comment' + + rule /(\b\w+)(\s*)(=)/ do |_, var, ws, eq, &out| + out.call 'Name.Variable', var + out.call 'Text', ws + out.call 'Operator', eq + end + + rule /[\[\]{}()=]/, 'Operator' + rule /&&|\|\|/, 'Operator' + + rule /<<" + end + + class << self + def get(name) + Token[name] + end + alias [] get + end + + Token = new + Text = Token[:Text] + Whitespace = Token[:Whitespace] + Error = Token[:Error] + + Keyword = Token[:Keyword] + Name = Token[:Name] + Literal = Token[:Literal] + String = Literal[:String] + Number = Literal[:Number] + Punctuation = Token[:Punctuation] + Operator = Token[:Operator] + Comment = Token[:Comment] + end +end diff --git a/lib/rouge/version.rb b/lib/rouge/version.rb new file mode 100644 index 0000000000..f347094ffd --- /dev/null +++ b/lib/rouge/version.rb @@ -0,0 +1,5 @@ +module Rouge + def self.version + "0.0.1" + end +end diff --git a/rouge.gemspec b/rouge.gemspec new file mode 100644 index 0000000000..58445aa1d2 --- /dev/null +++ b/rouge.gemspec @@ -0,0 +1,15 @@ +require './lib/rouge/version' + +Gem::Specification.new do |s| + s.name = "rouge" + s.version = Rouge.version + s.authors = ["Jay Adkisson"] + s.email = ["jjmadkisson@gmail.com"] + s.summary = "A pure-ruby colorizer based on pygments" + s.description = "see the description for now" + s.homepage = "http://github.com/jayferd/rouge" + s.rubyforge_project = "rouge" + s.files = Dir['Gemfile', 'cacher.gemspec', 'lib/**/*.rb'] + + # no dependencies +end diff --git a/spec/lexer_spec.rb b/spec/lexer_spec.rb new file mode 100644 index 0000000000..0c762a03b1 --- /dev/null +++ b/spec/lexer_spec.rb @@ -0,0 +1,54 @@ +describe Rouge::Lexer do + it 'makes a simple lexer' do + a_lexer = Rouge::RegexLexer.new do + rule /a/, 'A' + rule /b/, 'B' + end + + token_A = Rouge::Token[:A] + token_B = Rouge::Token[:B] + result = a_lexer.get_tokens('aa') + + assert { result.size == 2 } + assert { result == [[token_A, 'a']] * 2 } + end + + it 'makes sublexers' do + a_lexer = Rouge::RegexLexer.new do + lexer :brace do + rule /b/, 'B' + rule /}/, 'Brace', :pop! + end + + rule /{/, 'Brace', :brace + rule /a/, 'A' + end + + result = a_lexer.get_tokens('a{b}a') + assert { result.size == 5 } + + # failed parses + assert { + a_lexer.get_tokens('{a}') == + [[Rouge::Token['Brace'], '{'], [Rouge::Token['Text'], 'a}']] + } + + assert { a_lexer.get_tokens('b') == [[Rouge::Token['Text'], 'b']] } + assert { a_lexer.get_tokens('}') == [[Rouge::Token['Text'], '}']] } + end + + it 'does callbacks' do + callback_lexer = Rouge::RegexLexer.new do + rule /(a)(b)/ do |_, a, b, &out| + out.call 'A', a + out.call 'B', b + end + end + + result = callback_lexer.get_tokens('ab') + + assert { result.size == 2 } + assert { result[0] == [Rouge::Token['A'], 'a'] } + assert { result[1] == [Rouge::Token['B'], 'b'] } + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000000..7437ce2e23 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,16 @@ +require 'rubygems' +require 'bundler' +Bundler.require + +require 'rouge' +require 'minitest/spec' + +Wrong.config[:color] = true + +class MiniTest::Unit::TestCase + include Wrong +end + +Dir[File.expand_path('support/**/*.rb', File.dirname(__FILE__))].each {|f| + require f +} diff --git a/spec/token_spec.rb b/spec/token_spec.rb new file mode 100644 index 0000000000..a78886230b --- /dev/null +++ b/spec/token_spec.rb @@ -0,0 +1,21 @@ +describe Rouge::Token do + it 'has a name' do + assert { Rouge::Token::Text.name == 'Text' } + assert { Rouge::Token::String.name == 'Literal.String' } + end + + it 'can be fetched by name' do + assert { Rouge::Token['Text'] == Rouge::Token::Text } + assert { Rouge::Token['Literal.String'] == Rouge::Token::String } + end + + it 'compares equal values' do + assert { Rouge::Token['Foo.Bar'] === Rouge::Token['Foo.Bar'] } + deny { Rouge::Token['Foo.Bar'] === Rouge::Token['Foo.Baz'] } + end + + it 'compares children' do + assert { Rouge::Token['Foo'] === Rouge::Token['Foo.Bar'] } + deny { Rouge::Token['Foo.Bar'] === Rouge::Token['Foo'] } + end +end