Skip to content

Commit bbe7001

Browse files
authored
Fix File.Stream Enumerable.count for files without trailing newline (#15147)
The optimized count implementation was counting newline characters instead of actual lines, returning incorrect results for files that don't end with a newline (which is common for source code files). For example, a file with content "line1\nline2\nline3" (3 lines) was returning 2 instead of 3. The fix tracks the last byte read and adds 1 at EOF if the file has content but doesn't end with a newline.
1 parent d61ba91 commit bbe7001

2 files changed

Lines changed: 41 additions & 6 deletions

File tree

lib/elixir/lib/file/stream.ex

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ defmodule File.Stream do
119119

120120
counter = fn device ->
121121
device = skip_bom_and_offset(device, raw, modes)
122-
count_lines(device, path, pattern, read_function(stream), 0)
122+
count_lines(device, path, pattern, read_function(stream), 0, :empty)
123123
end
124124

125125
{:ok, open!(stream, modes, counter)}
@@ -229,21 +229,28 @@ defmodule File.Stream do
229229
for mode <- modes, mode not in [:write, :append, :trim_bom], do: mode
230230
end
231231

232-
defp count_lines(device, path, pattern, read, count) do
232+
defp count_lines(device, path, pattern, read, count, last_byte) do
233233
case read.(device) do
234+
data when is_binary(data) and byte_size(data) > 0 ->
235+
newlines = length(:binary.matches(data, pattern))
236+
last = :binary.last(data)
237+
count_lines(device, path, pattern, read, count + newlines, last)
238+
234239
data when is_binary(data) ->
235-
count_lines(device, path, pattern, read, count + count_lines(data, pattern))
240+
count_lines(device, path, pattern, read, count, last_byte)
236241

237242
:eof ->
238-
count
243+
case last_byte do
244+
:empty -> 0
245+
?\n -> count
246+
_ -> count + 1
247+
end
239248

240249
{:error, reason} ->
241250
raise File.Error, reason: reason, action: "stream", path: path
242251
end
243252
end
244253

245-
defp count_lines(data, pattern), do: length(:binary.matches(data, pattern))
246-
247254
defp read_function(%{raw: true}), do: &IO.binread(&1, @read_ahead_size)
248255
defp read_function(%{raw: false}), do: &IO.read(&1, @read_ahead_size)
249256
end

lib/elixir/test/elixir/file/stream_test.exs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,34 @@ defmodule File.StreamTest do
6565
assert Enum.count(stream) == 2
6666
end
6767

68+
test "counts lines without trailing newline" do
69+
no_trailing = tmp_path("no_trailing.txt")
70+
single_line = tmp_path("single_line.txt")
71+
empty_file = tmp_path("empty.txt")
72+
73+
try do
74+
File.write!(no_trailing, "line1\nline2\nline3")
75+
File.write!(single_line, "hello")
76+
File.write!(empty_file, "")
77+
78+
# 3 lines, no trailing newline
79+
stream = stream!(@node, no_trailing)
80+
assert Enum.count(stream) == 3
81+
82+
# 1 line, no newline at all
83+
stream = stream!(@node, single_line)
84+
assert Enum.count(stream) == 1
85+
86+
# empty file
87+
stream = stream!(@node, empty_file)
88+
assert Enum.count(stream) == 0
89+
after
90+
File.rm(no_trailing)
91+
File.rm(single_line)
92+
File.rm(empty_file)
93+
end
94+
end
95+
6896
test "reads and writes lines" do
6997
src = fixture_path("file.txt")
7098
dest = tmp_path("tmp_test.txt")

0 commit comments

Comments
 (0)