Skip to content

Commit

Permalink
Merge pull request #182 from gcluzel/better-error-messages
Browse files Browse the repository at this point in the history
Improve the error messages displayed by the JSON5 lexer and parser.
  • Loading branch information
Leonidas-from-XIV authored Jun 27, 2024
2 parents 08b564b + 1f516e8 commit 93f3496
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 84 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## unreleased

### Added

- Add locations in the JSON5 parser error messages (@gcluzel, #182)

## 2.2.1

*2024-06-04*
Expand Down
4 changes: 4 additions & 0 deletions lib/json5/errors.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
let string_of_position { Lexing.pos_lnum; pos_fname; _ } =
match pos_fname with
| "" -> Printf.sprintf "Line %d" pos_lnum
| fname -> Printf.sprintf "File %s, line %d" fname pos_lnum
4 changes: 4 additions & 0 deletions lib/json5/errors.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
val string_of_position : Lexing.position -> string
(** [string_of_position pos] returns a string that contains the line and, if
supplied, the filename of the position in a way that's appropriate to include
in an error message *)
107 changes: 57 additions & 50 deletions lib/json5/lexer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ type token =
| CLOSE_BRACKET
| COLON
| COMMA
| COMMENT of string
| TRUE
| FALSE
| NULL
Expand All @@ -18,25 +17,38 @@ type token =
| INT of string
| STRING of string
| IDENTIFIER_NAME of string

let pp_token ppf = function
| OPEN_PAREN -> Format.fprintf ppf "'('"
| CLOSE_PAREN -> Format.fprintf ppf "')'"
| OPEN_BRACE -> Format.fprintf ppf "'{'"
| CLOSE_BRACE -> Format.fprintf ppf "'}'"
| OPEN_BRACKET -> Format.fprintf ppf "'['"
| CLOSE_BRACKET -> Format.fprintf ppf "']'"
| COLON -> Format.fprintf ppf "':'"
| COMMA -> Format.fprintf ppf "','"
| COMMENT s -> Format.fprintf ppf "COMMENT '%s'" s
| TRUE -> Format.fprintf ppf "'true'"
| FALSE -> Format.fprintf ppf "'false'"
| NULL -> Format.fprintf ppf "'null'"
| FLOAT s -> Format.fprintf ppf "FLOAT '%s'" s
| INT_OR_FLOAT s -> Format.fprintf ppf "INT_OR_FLOAT '%s'" s
| INT s -> Format.fprintf ppf "INT '%s'" s
| STRING s -> Format.fprintf ppf "STRING '%s'" s
| IDENTIFIER_NAME s -> Format.fprintf ppf "IDENTIFIER_NAME '%s'" s
| EOF

let pp_token ppf =
let ps = Format.pp_print_string ppf in
let pf = Format.fprintf ppf in
function
| OPEN_PAREN -> ps "'('"
| CLOSE_PAREN -> ps "')'"
| OPEN_BRACE -> ps "'{'"
| CLOSE_BRACE -> ps "'}'"
| OPEN_BRACKET -> ps "'['"
| CLOSE_BRACKET -> ps "']'"
| COLON -> ps "':'"
| COMMA -> ps "','"
| TRUE -> ps "'true'"
| FALSE -> ps "'false'"
| NULL -> ps "'null'"
| FLOAT s -> pf "FLOAT %S" s
| INT_OR_FLOAT s -> pf "INT_OR_STRING %S" s
| INT s -> pf "INT %S" s
| STRING s -> pf "%S" s
| IDENTIFIER_NAME s -> pf "IDENTIFIER_NAME %S" s
| EOF -> ps "EOF"

let lexer_error lexbuf =
let pos_start, _pos_end = Sedlexing.lexing_positions lexbuf in
let location = Errors.string_of_position pos_start in
let msg =
Printf.sprintf "%s: Unexpected character '%s'" location
(Sedlexing.Utf8.lexeme lexbuf)
in
Error msg

let source_character = [%sedlex.regexp? any]
let line_terminator = [%sedlex.regexp? 0x000A | 0x000D | 0x2028 | 0x2029]
Expand Down Expand Up @@ -182,10 +194,7 @@ let string_lex_single lexbuf strbuf =
| Sub (source_character, ('\'' | line_terminator)) ->
Buffer.add_string strbuf (lexeme lexbuf);
lex lexbuf strbuf
| _ ->
lexeme lexbuf
|> Format.sprintf "Unexpected character: %s"
|> Result.error
| _ -> lexer_error lexbuf
in
lex lexbuf strbuf

Expand All @@ -202,50 +211,48 @@ let string_lex_double lexbuf strbuf =
| Sub (source_character, ('"' | line_terminator)) ->
Buffer.add_string strbuf (lexeme lexbuf);
lex lexbuf strbuf
| _ ->
lexeme lexbuf
|> Format.sprintf "Unexpected character: %s"
|> Result.error
| _ -> lexer_error lexbuf
in
lex lexbuf strbuf

let string_lex lexbuf quote =
let strbuf = Buffer.create 200 in
if quote = "'" then string_lex_single lexbuf strbuf
else if quote = {|"|} then string_lex_double lexbuf strbuf
else Error (Format.sprintf "Invalid string quote %S" quote)
match quote with
| "'" -> string_lex_single lexbuf strbuf
| {|"|} -> string_lex_double lexbuf strbuf
| _ -> Error (Printf.sprintf "Invalid string quote %S" quote)

let rec lex tokens buf =
let lexeme = Sedlexing.Utf8.lexeme in
let pos, _ = Sedlexing.lexing_positions buf in
match%sedlex buf with
| '(' -> lex (OPEN_PAREN :: tokens) buf
| ')' -> lex (CLOSE_PAREN :: tokens) buf
| '{' -> lex (OPEN_BRACE :: tokens) buf
| '}' -> lex (CLOSE_BRACE :: tokens) buf
| '[' -> lex (OPEN_BRACKET :: tokens) buf
| ']' -> lex (CLOSE_BRACKET :: tokens) buf
| ':' -> lex (COLON :: tokens) buf
| ',' -> lex (COMMA :: tokens) buf
| '(' -> lex ((OPEN_PAREN, pos) :: tokens) buf
| ')' -> lex ((CLOSE_PAREN, pos) :: tokens) buf
| '{' -> lex ((OPEN_BRACE, pos) :: tokens) buf
| '}' -> lex ((CLOSE_BRACE, pos) :: tokens) buf
| '[' -> lex ((OPEN_BRACKET, pos) :: tokens) buf
| ']' -> lex ((CLOSE_BRACKET, pos) :: tokens) buf
| ':' -> lex ((COLON, pos) :: tokens) buf
| ',' -> lex ((COMMA, pos) :: tokens) buf
| Chars {|"'|} ->
let* s = string_lex buf (lexeme buf) in
lex (STRING s :: tokens) buf
lex ((STRING s, pos) :: tokens) buf
| multi_line_comment | single_line_comment | white_space | line_terminator ->
lex tokens buf
| "true" -> lex (TRUE :: tokens) buf
| "false" -> lex (FALSE :: tokens) buf
| "null" -> lex (NULL :: tokens) buf
| "true" -> lex ((TRUE, pos) :: tokens) buf
| "false" -> lex ((FALSE, pos) :: tokens) buf
| "null" -> lex ((NULL, pos) :: tokens) buf
| json5_float ->
let s = lexeme buf in
lex (FLOAT s :: tokens) buf
lex ((FLOAT s, pos) :: tokens) buf
| json5_int ->
let s = lexeme buf in
lex (INT s :: tokens) buf
lex ((INT s, pos) :: tokens) buf
| json5_int_or_float ->
let s = lexeme buf in
lex (INT_OR_FLOAT s :: tokens) buf
lex ((INT_OR_FLOAT s, pos) :: tokens) buf
| identifier_name ->
let s = lexeme buf in
lex (IDENTIFIER_NAME s :: tokens) buf
| eof -> Ok (List.rev tokens)
| _ ->
lexeme buf |> Format.asprintf "Unexpected character: '%s'" |> Result.error
lex ((IDENTIFIER_NAME s, pos) :: tokens) buf
| eof -> Ok (List.rev ((EOF, pos) :: tokens))
| _ -> lexer_error buf
77 changes: 49 additions & 28 deletions lib/json5/parser.ml
Original file line number Diff line number Diff line change
@@ -1,46 +1,69 @@
open Let_syntax.Result

let parser_error pos error =
let location = Errors.string_of_position pos in
let msg = Printf.sprintf "%s: %s" location error in
Error msg

let rec parse_list acc = function
| [] -> Error "List never ends"
| Lexer.CLOSE_BRACKET :: xs -> Ok (acc, xs)
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> parser_error pos "Unexpected end of input"
| (Lexer.CLOSE_BRACKET, _) :: xs -> Ok (acc, xs)
| xs -> (
let* v, xs = parse xs in
match xs with
| [] -> Error "List was not closed"
| Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs ->
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> parser_error pos "Unexpected end of input"
| (Lexer.CLOSE_BRACKET, _) :: xs | (COMMA, _) :: (CLOSE_BRACKET, _) :: xs
->
Ok (v :: acc, xs)
| COMMA :: xs -> parse_list (v :: acc) xs
| x :: _ ->
| (COMMA, _) :: xs -> parse_list (v :: acc) xs
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected list token: %a" Lexer.pp_token x
in
Error s)
parser_error pos s)

and parse_assoc acc = function
| [] -> Error "Assoc never ends"
| Lexer.CLOSE_BRACE :: xs -> Ok (acc, xs)
| STRING k :: COLON :: xs | IDENTIFIER_NAME k :: COLON :: xs -> (
let* v, xs = parse xs in
let item = (k, v) in
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> parser_error pos "Unexpected end of input"
| (CLOSE_BRACE, _) :: xs -> Ok (acc, xs)
| (STRING k, _) :: xs | (IDENTIFIER_NAME k, _) :: xs -> (
match xs with
| [] -> Error "Object was not closed"
| Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs ->
Ok (item :: acc, xs)
| COMMA :: xs -> parse_assoc (item :: acc) xs
| x :: _ ->
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> parser_error pos "Unexpected end of input"
| (Lexer.COLON, _) :: xs -> (
let* v, xs = parse xs in
let item = (k, v) in
match xs with
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> parser_error pos "Unexpected end of input"
| (CLOSE_BRACE, _) :: xs | (COMMA, _) :: (CLOSE_BRACE, _) :: xs ->
Ok (item :: acc, xs)
| (COMMA, _) :: xs -> parse_assoc (item :: acc) xs
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token
x
in
parser_error pos s)
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x
Format.asprintf "Expected %a but found %a" Lexer.pp_token
Lexer.COLON Lexer.pp_token x
in
Error s)
| x :: _ ->
parser_error pos s)
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x
Format.asprintf "Expected string or identifier but found %a"
Lexer.pp_token x
in
Error s
parser_error pos s

and parse = function
| [] -> Error "empty list of tokens"
| token :: xs -> (
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> parser_error pos "Unexpected end of input"
| (token, pos) :: xs -> (
match token with
| TRUE -> Ok (Ast.Bool true, xs)
| FALSE -> Ok (Bool false, xs)
Expand All @@ -57,12 +80,10 @@ and parse = function
(Ast.Assoc (List.rev a), xs)
| x ->
let s = Format.asprintf "Unexpected token: %a" Lexer.pp_token x in
Error s)
parser_error pos s)

let parse_from_lexbuf ?fname ?lnum lexbuffer =
let fname = Option.value fname ~default:"" in
let parse_from_lexbuf ?(fname = "") ?(lnum = 1) lexbuffer =
Sedlexing.set_filename lexbuffer fname;
let lnum = Option.value lnum ~default:1 in
let pos =
{ Lexing.pos_fname = fname; pos_lnum = lnum; pos_bol = 0; pos_cnum = 0 }
in
Expand Down
26 changes: 20 additions & 6 deletions test_json5/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,23 @@ module M = Yojson_five.Safe

let yojson = Alcotest.testable M.pp M.equal

let parsing_test_case name expected input =
(* any error message will match the string. *)
let any_string = Alcotest.testable Fmt.string (fun _ _ -> true)

let parsing_test_case name error_msg expected input =
Alcotest.test_case name `Quick (fun () ->
(* any error message will do *)
let any_string = Alcotest.testable Fmt.string (fun _ _ -> true) in
Alcotest.(check (result yojson any_string))
Alcotest.(check (result yojson error_msg))
name expected (M.from_string input))

let parsing_should_succeed name input expected =
parsing_test_case name (Ok expected) input
parsing_test_case name Alcotest.string (Ok expected) input

let parsing_should_fail name input =
let failure = Error "<anything>" in
parsing_test_case name failure input
parsing_test_case name any_string failure input

let parsing_should_fail_with_error name input expected =
parsing_test_case name Alcotest.string (Error expected) input

let parsing_tests =
[
Expand Down Expand Up @@ -101,6 +105,16 @@ No \\n's!",
"backwardsCompatible": "with JSON",
}|}
expected);
parsing_should_fail_with_error "unexpected EOF in list" "[1, 2,"
"Line 1: Unexpected end of input";
parsing_should_fail_with_error "unexpected EOF on different line" "\n[1, 2,"
"Line 2: Unexpected end of input";
parsing_should_fail_with_error "unexpected EOF in assoc" {|{"foo": 1,|}
"Line 1: Unexpected end of input";
parsing_should_fail_with_error "missing colon in assoc" {|{"foo"}|}
"Line 1: Expected ':' but found '}'";
parsing_should_fail_with_error "bad identifier in assoc" {|{[0]}|}
"Line 1: Expected string or identifier but found '['";
]

let writing_test_case name input expected =
Expand Down

0 comments on commit 93f3496

Please sign in to comment.