diff --git a/include/syntax/tokens.def b/include/syntax/tokens.def index 9c143ff..2703bcf 100644 --- a/include/syntax/tokens.def +++ b/include/syntax/tokens.def @@ -1,5 +1,7 @@ ADD(Data) +ADD(Shebang) ADD(Comment) +ADD(Error) ADD(String) ADD(Escape) ADD(Interpolation) diff --git a/samples/ruby.rb b/samples/ruby.rb index 580dc57..e93476b 100644 --- a/samples/ruby.rb +++ b/samples/ruby.rb @@ -22,7 +22,7 @@ cjk_samples = [ ] # Ruby regex with unicode -$unicode_regex = /[一-龯ぁ-ん#{0x3000}ァ +$unicode_regex_multiline = /[一-龯ぁ-ん#{0x3000}ァ \-ヶー s wow @@ -30,22 +30,29 @@ s wow 々〆〤]/ UNICORE = %r{ - + [s] {#{}} - + \C-s\u{10} } UNINITCORE = %{ {{#{}}} + test = "A:\x41 B:\101 C:\u0043 D:\u{44 45} NUL:\0 DEL:\c? CTRL_A:\cA META_X:\M-x CTRL_META_X:\C-\M-x MIX:\C-\M-z N:\N{UNICODE NAME}" + } # Unicode identifiers (valid in Ruby) 变量 = 0x5_4eddaee -π = 3.14_159e+2, ?\u0234, ?\,, ?\x0A, ?s, true, false +π = 3.14_159e+2, ?\u0234, ?\,, ?\x0A, ?s, true, false, 0 挨拶 = -> { "こんに \n ちは" } +arr = Array.new() +not_arr = NotArray.new() + +raise NameError or SystemExit or CustomError or Errno or ErrorNotAtAll + # Method using unicode variable names def math_test puts "π * 2 = #{π * 2}" @@ -57,6 +64,7 @@ cjk_samples.each_with_index do |str, idx:| symbol = :" a " + sym2 = :hello end # Test emoji width behaviors @@ -98,10 +106,10 @@ mixed = [ two_docs = <><< +{{{}}}[[[]]](((000))) diff --git a/src/syntax/bash.cc b/src/syntax/bash.cc index c36f504..881cfd6 100644 --- a/src/syntax/bash.cc +++ b/src/syntax/bash.cc @@ -70,3 +70,9 @@ std::shared_ptr bash_parse(std::vector *tokens, } return state; } + +// String literals surrounded by ' strictly with no escaping inside +// double quoted strings " allow interpolation and escaping - with $var and +// ${var} and $((math)) $(command) and `command` expansions ANSI-C quoted +// stirngs - $'' backslash escapes but with \xHH and \uHHHH and \uHHHHHHHH \cX +// too diff --git a/src/syntax/ruby.cc b/src/syntax/ruby.cc index c29f0ce..80e4720 100644 --- a/src/syntax/ruby.cc +++ b/src/syntax/ruby.cc @@ -1,6 +1,182 @@ #include "syntax/decl.h" #include "syntax/langs.h" +// TODO: in regex better highlighting of regex structures + +const static std::vector types = { + "BasicObject", "Object", "NilClass", + "TrueClass", "FalseClass", "Integer", + "Fixnum", "Bignum", "Float", + "Rational", "Complex", "Numeric", + "String", "Symbol", "Array", + "Hash", "Range", "Regexp", + "Struct", "Enumarator", "Enumerable", + "Time", "Date", "IO", + "File", "Dir", "Thread", + "Proc", "Method", "Module", + "Class", "Mutex", "ConditionVariable", + "MatchData", "Encoding", "Fiber", +}; + +const static std::vector builtins = { + "ARGF", + "ARGV", + "ENV", + "STDIN", + "STDOUT", + "STDERR", + "DATA", + "TOPLEVEL_BINDING", + "RUBY_PLATFORM", + "RUBY_VERSION", + "RUBY_RELEASE_DATE", + "RUBY_PATCHLEVEL", + "RUBY_ENGINE", + "__LINE__", + "__FILE__", + "__ENCODING__", + "__dir__", + "__callee__", + "__method__", + "__id__", + "__send__", +}; + +const static std::vector methods = { + "abort", + "at_exit", + "binding", + "block_given?", + "caller", + "catch", + "chomp", + "chomp!", + "chop", + "chop!", + "eval", + "exec", + "exit", + "exit!", + "fail", + "fork", + "format", + "gets", + "global_variables", + "gsub", + "gsub!", + "iterator?", + "lambda", + "load", + "loop", + "open", + "print", + "printf", + "proc", + "putc", + "puts", + "raise", + "rand", + "readline", + "readlines", + "require", + "require_relative", + "select", + "sleep", + "spawn", + "split", + "sprintf", + "srand", + "sub", + "sub!", + "syscall", + "system", + "test", + "throw", + "trace_var", + "trap", + "untrace_var", + "attr", + "attr_reader", + "attr_writer", + "attr_accessor", + "class_variable_get", + "class_variable_set", + "define_method", + "instance_variable_get", + "instance_variable_set", + "private", + "protected", + "public", + "public_class_method", + "module_function", + "remove_method", + "undef_method", + "method", + "methods", + "singleton_methods", + "private_methods", + "protected_methods", + "public_methods", + "send", + "extend", + "include", + "prepend", + "clone", + "dup", + "freeze", + "taint", + "untaint", + "trust", + "untrust", + "untaint?", + "trust?", + "each", + "each_with_index", + "each_with_object", + "map", + "collect", + "select", + "reject", + "reduce", + "inject", + "find", + "detect", + "all?", + "any?", + "none?", + "one?", + "count", + "cycle", + "drop", + "drop_while", + "take", + "take_while", + "chunk", + "chunk_while", + "group_by", + "partition", + "slice_before", + "slice_after", + "nil?", + "is_a?", + "kind_of?", + "instance_of?", + "respond_to?", + "equal?", + "object_id", + "class", + "singleton_class", + "clone", + "freeze", + "tap", + "then", +}; + +const static std::vector errors = { + "Exception", "SignalException", "Interrupt", "StopIteration", + "Errno", "SystemExit", "fatal", +}; + const static std::vector base_keywords = { "class", "module", "begin", "end", "else", "rescue", "ensure", "do", "when", }; @@ -119,12 +295,20 @@ std::shared_ptr ruby_parse(std::vector *tokens, static Trie operator_keywords_trie; static Trie expecting_operators_trie; static Trie operator_trie; + static Trie types_trie; + static Trie builtins_trie; + static Trie methods_trie; + static Trie errors_trie; if (!keywords_trie_init) { base_keywords_trie.build(base_keywords); expecting_keywords_trie.build(expecting_keywords); operator_keywords_trie.build(operator_keywords); expecting_operators_trie.build(expecting_operators); operator_trie.build(operators); + types_trie.build(types); + builtins_trie.build(builtins); + methods_trie.build(methods); + errors_trie.build(errors); keywords_trie_init = true; } tokens->clear(); @@ -174,8 +358,60 @@ std::shared_ptr ruby_parse(std::vector *tokens, tokens->push_back({start, i, TokenKind::String}); start = i; i++; - if (i < len) + if (i < len && text[i] == 'x') { i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + } else if (i < len && text[i] == 'u') { + i++; + if (i < len && text[i] == '{') { + i++; + while (i < len && text[i] != '}') + i++; + if (i < len) + i++; + } else { + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + } + } else if (i < len && text[i] >= '0' && text[i] <= '7') { + i++; + if (i < len && text[i] >= '0' && text[i] <= '7') + i++; + if (i < len && text[i] >= '0' && text[i] <= '7') + i++; + } else if (i < len && text[i] == 'c') { + i++; + if (i < len && text[i] != '\\') + i++; + } else if (i < len && (text[i] == 'M' || text[i] == 'C')) { + i++; + if (i < len && text[i] == '-') { + i++; + if (i < len && text[i] != '\\') + i++; + } + } else if (i < len && text[i] == 'N') { + i++; + if (i < len && text[i] == '{') { + i++; + while (i < len && text[i] != '}') + i++; + if (i < len) + i++; + } + } else { + if (i < len) + i++; + } tokens->push_back({start, i, TokenKind::Escape}); continue; } @@ -202,11 +438,62 @@ std::shared_ptr ruby_parse(std::vector *tokens, tokens->push_back({start, i, TokenKind::String}); start = i; i++; - if (i < len) + if (i < len && text[i] == 'x') { i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + } else if (i < len && text[i] == 'u') { + i++; + if (i < len && text[i] == '{') { + i++; + while (i < len && text[i] != '}') + i++; + if (i < len) + i++; + } else { + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + } + } else if (i < len && text[i] >= '0' && text[i] <= '7') { + i++; + if (i < len && text[i] >= '0' && text[i] <= '7') + i++; + if (i < len && text[i] >= '0' && text[i] <= '7') + i++; + } else if (i < len && text[i] == 'c') { + i++; + if (i < len && text[i] != '\\') + i++; + } else if (i < len && (text[i] == 'M' || text[i] == 'C')) { + i++; + if (i < len && text[i] == '-') { + i++; + if (i < len && text[i] != '\\') + i++; + } + } else if (i < len && text[i] == 'N') { + i++; + if (i < len && text[i] == '{') { + i++; + while (i < len && text[i] != '}') + i++; + if (i < len) + i++; + } + } else { + if (i < len) + i++; + } tokens->push_back({start, i, TokenKind::Escape}); continue; - continue; } if (state->full_state->lit.allow_interp && text[i] == '#' && i + 1 < len && text[i + 1] == '{') { @@ -253,11 +540,62 @@ std::shared_ptr ruby_parse(std::vector *tokens, while (i < len) { if (text[i] == '\\') { tokens->push_back({start, i, TokenKind::Regexp}); - ; start = i; i++; - if (i < len) + if (i < len && text[i] == 'x') { i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + } else if (i < len && text[i] == 'u') { + i++; + if (i < len && text[i] == '{') { + i++; + while (i < len && text[i] != '}') + i++; + if (i < len) + i++; + } else { + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + if (i < len && isxdigit(text[i])) + i++; + } + } else if (i < len && text[i] >= '0' && text[i] <= '7') { + i++; + if (i < len && text[i] >= '0' && text[i] <= '7') + i++; + if (i < len && text[i] >= '0' && text[i] <= '7') + i++; + } else if (i < len && text[i] == 'c') { + i++; + if (i < len && text[i] != '\\') + i++; + } else if (i < len && (text[i] == 'M' || text[i] == 'C')) { + i++; + if (i < len && text[i] == '-') { + i++; + if (i < len && text[i] != '\\') + i++; + } + } else if (i < len && text[i] == 'N') { + i++; + if (i < len && text[i] == '{') { + i++; + while (i < len && text[i] != '}') + i++; + if (i < len) + i++; + } + } else { + if (i < len) + i++; + } tokens->push_back({start, i, TokenKind::Escape}); continue; } @@ -365,6 +703,11 @@ std::shared_ptr ruby_parse(std::vector *tokens, i++; continue; } else if (text[i] == '#') { + if (i == 0 && len > 4 && text[i + 1] == '!') { + state->full_state->expecting_expr = false; + tokens->push_back({0, len, TokenKind::Shebang}); + return state; + } tokens->push_back({i, len, TokenKind::Comment}); state->full_state->expecting_expr = false; return state; @@ -394,7 +737,7 @@ std::shared_ptr ruby_parse(std::vector *tokens, continue; } if (text[i] == '\'' || text[i] == '"') { - tokens->push_back({start, i, TokenKind::Operator}); + tokens->push_back({start, i, TokenKind::Label}); state->full_state->expecting_expr = true; continue; } @@ -793,9 +1136,31 @@ std::shared_ptr ruby_parse(std::vector *tokens, tokens->push_back({i, i + length, TokenKind::KeywordOperator}); i += length; continue; + } else if ((length = types_trie.match(text, i, len, identifier_char))) { + tokens->push_back({i, i + length, TokenKind::Type}); + i += length; + continue; + } else if ((length = methods_trie.match(text, i, len, identifier_char))) { + tokens->push_back({i, i + length, TokenKind::Function}); + i += length; + continue; + } else if ((length = + builtins_trie.match(text, i, len, identifier_char))) { + tokens->push_back({i, i + length, TokenKind::Constant}); + i += length; + continue; + } else if ((length = errors_trie.match(text, i, len, identifier_char))) { + tokens->push_back({i, i + length, TokenKind::Error}); + i += length; + continue; } else if (text[i] >= 'A' && text[i] <= 'Z') { uint32_t start = i; i += get_next_word(text, i, len); + if (i - start >= 5 && text[i - 5] == 'E' && text[i - 4] == 'r' && + text[i - 3] == 'r' && text[i - 2] == 'o' && text[i - 1] == 'r') { + tokens->push_back({start, i, TokenKind::Error}); + continue; + } tokens->push_back({start, i, TokenKind::Constant}); continue; } else { @@ -899,7 +1264,3 @@ std::shared_ptr ruby_parse(std::vector *tokens, } return state; } - -// TODO: Add trie's for builtins and highlight them separately liek (Array / -// self etc) -// And in regex better highlighting of regex structures diff --git a/themes/default.json b/themes/default.json index 0cbd02b..6107e61 100644 --- a/themes/default.json +++ b/themes/default.json @@ -2,6 +2,12 @@ "Default": { "fg": "#EEEEEE" }, + "Shebang": { + "fg": "#7dcfff" + }, + "Error": { + "fg": "#EF5168" + }, "Comment": { "fg": "#AAAAAA", "italic": true