Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make type/method-type parser more flexible about input position #1140

Merged
merged 1 commit into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ext/rbs_extension/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ typedef struct {
* */
typedef struct {
VALUE string;
int start_pos; /* The character position that defines the start of the input */
int end_pos; /* The character position that defines the end of the input */
position current; /* The current position */
position start; /* The start position of the current token */
bool first_token_of_line; /* This flag is used for tLINECOMMENT */
Expand Down
12 changes: 9 additions & 3 deletions ext/rbs_extension/lexstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,14 @@ int token_bytes(token tok) {
}

unsigned int peek(lexstate *state) {
unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string));
state->last_char = c;
return c;
if (state->current.char_pos == state->end_pos) {
state->last_char = '\0';
return 0;
} else {
unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string));
state->last_char = c;
return c;
}
}

token next_token(lexstate *state, enum TokenType type) {
Expand Down Expand Up @@ -137,6 +142,7 @@ void skip(lexstate *state) {

void skipn(lexstate *state, size_t size) {
for (size_t i = 0; i < size; i ++) {
peek(state);
skip(state);
}
}
Expand Down
39 changes: 28 additions & 11 deletions ext/rbs_extension/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -2502,32 +2502,49 @@ VALUE parse_signature(parserstate *state) {
}

static VALUE
rbsparser_parse_type(VALUE self, VALUE buffer, VALUE line, VALUE column, VALUE variables)
rbsparser_parse_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE requires_eof)
{
parserstate *parser = alloc_parser(buffer, FIX2INT(line), FIX2INT(column), variables);
parserstate *parser = alloc_parser(buffer, FIX2INT(start_pos), FIX2INT(end_pos), variables);

if (parser->next_token.type == pEOF) {
return Qnil;
}

VALUE type = parse_type(parser);
parser_advance_assert(parser, pEOF);

if (RTEST(requires_eof)) {
parser_advance_assert(parser, pEOF);
}

free_parser(parser);

return type;
}

static VALUE
rbsparser_parse_method_type(VALUE self, VALUE buffer, VALUE line, VALUE column, VALUE variables)
rbsparser_parse_method_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE requires_eof)
{
parserstate *parser = alloc_parser(buffer, FIX2INT(line), FIX2INT(column), variables);
parserstate *parser = alloc_parser(buffer, FIX2INT(start_pos), FIX2INT(end_pos), variables);

if (parser->next_token.type == pEOF) {
return Qnil;
}

VALUE method_type = parse_method_type(parser);
free(parser);

if (RTEST(requires_eof)) {
parser_advance_assert(parser, pEOF);
}

free_parser(parser);

return method_type;
}

static VALUE
rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE line, VALUE column)
rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE end_pos)
{
parserstate *parser = alloc_parser(buffer, FIX2INT(line), FIX2INT(column), Qnil);
parserstate *parser = alloc_parser(buffer, 0, FIX2INT(end_pos), Qnil);
VALUE signature = parse_signature(parser);
free_parser(parser);

Expand All @@ -2536,7 +2553,7 @@ rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE line, VALUE column)

void rbs__init_parser(void) {
RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject);
rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 4);
rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 4);
rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 3);
rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5);
rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5);
rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 2);
}
8 changes: 5 additions & 3 deletions ext/rbs_extension/parserstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,15 @@ VALUE comment_to_ruby(comment *com, VALUE buffer) {
);
}

parserstate *alloc_parser(VALUE buffer, int line, int column, VALUE variables) {
parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables) {
VALUE string = rb_funcall(buffer, rb_intern("content"), 0);

lexstate *lexer = calloc(1, sizeof(lexstate));
lexer->string = string;
lexer->current.line = line;
lexer->current.column = column;
lexer->current.line = 1;
lexer->start_pos = start_pos;
lexer->end_pos = end_pos;
skipn(lexer, start_pos);
lexer->start = lexer->current;
lexer->first_token_of_line = lexer->current.column == 0;

Expand Down
2 changes: 1 addition & 1 deletion ext/rbs_extension/parserstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ bool parser_typevar_member(parserstate *state, ID id);
* alloc_parser(buffer, 3, 5, Qnil) // New parserstate without variables
* ```
* */
parserstate *alloc_parser(VALUE buffer, int line, int column, VALUE variables);
parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables);
void free_parser(parserstate *parser);
/**
* Advance one token.
Expand Down
15 changes: 9 additions & 6 deletions lib/rbs/parser_aux.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@

module RBS
class Parser
def self.parse_type(source, line: 1, column: 0, variables: [])
_parse_type(buffer(source), line, column, variables)
def self.parse_type(source, line: nil, column: nil, range: nil, variables: [])
buf = buffer(source)
_parse_type(buf, range&.begin || 0, range&.end || buf.last_position, variables, range.nil?)
end

def self.parse_method_type(source, line: 1, column: 0, variables: [])
_parse_method_type(buffer(source), line, column, variables)
def self.parse_method_type(source, line: nil, column: nil, range: nil, variables: [])
buf = buffer(source)
_parse_method_type(buf, range&.begin || 0, range&.end || buf.last_position, variables, range.nil?)
end

def self.parse_signature(source, line: 1, column: 0)
_parse_signature(buffer(source), line, column)
def self.parse_signature(source, line: nil, column: nil)
buf = buffer(source)
_parse_signature(buf, buf.last_position)
end

def self.buffer(source)
Expand Down
52 changes: 41 additions & 11 deletions sig/parser.rbs
Original file line number Diff line number Diff line change
@@ -1,25 +1,55 @@
module RBS
class Parser
def self.parse_method_type: (Buffer | String, ?line: Integer, ?column: Integer, ?variables: Array[Symbol]) -> MethodType

def self.parse_type: (Buffer | String, ?line: Integer, ?column: Integer, ?variables: Array[Symbol]) -> Types::t

def self.parse_signature: (Buffer | String, ?line: Integer, ?column: Integer) -> Array[AST::Declarations::t]
# Parse a method type and return it
#
# When `pos` keyword is specified, skips the first `pos` characters from the input.
# If no token is left in the input, it returns `nil`.
#
# ```ruby
# RBS::Parser.parse_method_type("() -> void", range: 0...) # => `() -> void`
# RBS::Parser.parse_method_type("() -> void () -> String", range: 11...) # => `() -> String`
# RBS::Parser.parse_method_type("() -> void () -> String", range: 23...) # => nil
# ```
#
# `line` and `column` is deprecated and are ignored.
#
def self.parse_method_type: (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol]) -> MethodType?
| (Buffer | String, ?line: top, ?column: top, ?variables: Array[Symbol]) -> MethodType

# Parse a type and return it
#
# When `pos` keyword is specified, skips the first `pos` characters from the input.
# If no token is left in the input, it returns `nil`.
#
# ```ruby
# RBS::Parser.parse_type("String", range: 0...) # => `String`
# RBS::Parser.parse_type("String Integer", pos: 7...) # => `Integer`
# RBS::Parser.parse_type("String Integer", pos: 14...) # => nil
# ```
#
# `line` and `column` is deprecated and are ignored.
#
def self.parse_type: (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol]) -> Types::t?
| (Buffer | String, ?line: top, ?column: top, ?variables: Array[Symbol]) -> Types::t

# Parse whole RBS file and return an array of declarations
#
# `line` and `column` is deprecated and are ignored.
#
def self.parse_signature: (Buffer | String) -> Array[AST::Declarations::t]
| (Buffer | String, ?line: top, ?column: top) -> Array[AST::Declarations::t]

KEYWORDS: Hash[String, bot]

private

def self.buffer: (String | Buffer source) -> Buffer

%a{no-defn}
def self._parse_type: (Buffer, Integer line, Integer column, Array[Symbol] variables) -> Types::t
def self._parse_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, boolish eof) -> Types::t?

%a{no-defn}
def self._parse_method_type: (Buffer, Integer line, Integer column, Array[Symbol] variables) -> MethodType
def self._parse_method_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, boolish eof) -> MethodType?

%a{no-defn}
def self._parse_signature: (Buffer, Integer line, Integer column) -> Array[AST::Declarations::t]
def self._parse_signature: (Buffer, Integer end_pos) -> Array[AST::Declarations::t]

class LocatedValue
end
Expand Down
53 changes: 53 additions & 0 deletions test/rbs/parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -647,4 +647,57 @@ def test_neline_inconsitency

RBS::Parser.parse_signature(code)
end

def test_buffer_location
code = buffer("type1 type2 type3")

RBS::Parser.parse_type(code, range: 0...).tap do |type|
assert_equal "type1", type.to_s
assert_equal 0...5, type.location.range
end

RBS::Parser.parse_type(code, range: 5...).tap do |type|
assert_equal "type2", type.to_s
assert_equal 6...11, type.location.range
assert_equal 1, type.location.start_line
assert_equal 6, type.location.start_column
assert_equal 1, type.location.end_line
assert_equal 11, type.location.end_column
end

RBS::Parser.parse_type(code, range: 5...).tap do |type|
assert_equal "type2", type.to_s
assert_equal 6...11, type.location.range
assert_equal 1, type.location.start_line
assert_equal 6, type.location.start_column
assert_equal 1, type.location.end_line
assert_equal 11, type.location.end_column
end

RBS::Parser.parse_type(code, range: 6...8).tap do |type|
assert_equal "ty", type.to_s
assert_equal 6...8, type.location.range
assert_equal 1, type.location.start_line
assert_equal 6, type.location.start_column
assert_equal 1, type.location.end_line
assert_equal 8, type.location.end_column
end
end

def test_parse_eof_nil
code = buffer("type1 ")

RBS::Parser.parse_type(code, range: 0...).tap do |type|
assert_equal "type1", type.to_s
assert_equal 0...5, type.location.range
end

RBS::Parser.parse_type(code, range: 5...).tap do |type|
assert_nil type
end

RBS::Parser.parse_type(code, range: 5...8).tap do |type|
assert_nil type
end
end
end