Skip to content

Commit 898f1a2

Browse files
authored
Reject unsupported newlines in tokenizer ASCII fast path (#15262)
1 parent 603602e commit 898f1a2

2 files changed

Lines changed: 33 additions & 0 deletions

File tree

lib/elixir/src/elixir_interpolation.erl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ extract([$\\ | Rest], Buffer, Output, Line, Column, Scope, Interpol, Last) ->
8080

8181
%% Catch all clause
8282

83+
extract([Char | _Rest], _Buffer, _Output, Line, Column, _Scope, _Interpol, _Last)
84+
when ?break(Char) ->
85+
Token = io_lib:format("\\u~4.16.0B", [Char]),
86+
Pos = io_lib:format(". If you want to use such character, use it in its escaped ~ts form instead", [Token]),
87+
{error, {?LOC(Line, Column),
88+
{"invalid line break character in string: ",
89+
Pos},
90+
Token}};
91+
8392
extract([Char1, Char2 | Rest], Buffer, Output, Line, Column, Scope, Interpol, Last)
8493
when Char1 =< 255, Char2 =< 255 ->
8594
extract([Char2 | Rest], [Char1 | Buffer], Output, Line, Column + 1, Scope, Interpol, Last);

lib/elixir/test/elixir/kernel/parser_test.exs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,30 @@ defmodule Kernel.ParserTest do
11311131
],
11321132
~c"\"this is a \\\u2028\""
11331133
)
1134+
1135+
assert_syntax_error(
1136+
[
1137+
"nofile:1:12:",
1138+
"invalid line break character in string: \\u000B. If you want to use such character, use it in its escaped \\u000B form instead"
1139+
],
1140+
:erlang.list_to_binary([34] ++ ~c"this is a " ++ [11, 34])
1141+
)
1142+
1143+
assert_syntax_error(
1144+
[
1145+
"nofile:1:12:",
1146+
"invalid line break character in string: \\u000C. If you want to use such character, use it in its escaped \\u000C form instead"
1147+
],
1148+
:erlang.list_to_binary([34] ++ ~c"this is a " ++ [12, 34])
1149+
)
1150+
1151+
assert_syntax_error(
1152+
[
1153+
"nofile:1:12:",
1154+
"invalid line break character in string: \\u0085. If you want to use such character, use it in its escaped \\u0085 form instead"
1155+
],
1156+
<<34, "this is a ", 194, 133, 34>>
1157+
)
11341158
end
11351159

11361160
test "reserved tokens" do

0 commit comments

Comments
 (0)