Day 9: Disk Fragmenter
Mix.install([
{:kino, "~> 0.14.0"}
])
Reading
- We’re compressing somebodies hand-crafted filesystem
-
Input a “disk map” that continas the information about data on the disk
- each digit indidactes alternatingly the size of a file
- OR the number of free space
-
12345
is a 1-block file, 2 block of free space, a 3-block file, 4 blocks of free space and a 5-block file
-
BEFORE re-arranging files we need to calculate the file-ID for each file
-
starting at
0
, counting from the start of the map,+1
for each file
-
starting at
-
We want to de-fragment the disk by
- moving each file BLOCK independently to the next free space
- the blocks of a single file can be fragmented/don’t have to be continuous
Input
# Useful for debugging
render_as_example = fn parsed_list ->
Enum.reduce(parsed_list, "", fn
{:file, id}, acc -> acc <> to_string(id)
:free, acc -> acc <> "."
end)
|> IO.inspect(label: "as example:")
end
disk_map =
Kino.FS.file_path("day9_input.bin")
|> File.read!()
|> String.codepoints()
|> Stream.reject(&Kernel.==(&1, "\n"))
|> Stream.map(&String.to_integer(&1))
|> Stream.zip(Stream.cycle([:file, :free]))
|> Enum.reduce({0, []}, fn
{0, _type}, {id, list} ->
{id, list}
{size, :file}, {id, list} ->
list = Enum.reduce(1..size, list, fn _, list -> [{:file, id} | list] end)
{id + 1, list}
{size, :free}, {id, list} ->
list = Enum.reduce(1..size, list, fn _, list -> [:free | list] end)
{id, list}
end)
|> then(fn {_, list} -> Enum.reverse(list) end)
:ok
Implementation
- We don’t need to keep the original order of blocks - as long as the block is correctly identified as being from the same file as before
- Blocks belonging to a single file don’t have to be continious
Idea
-
parse the input to be an array of the full storage
- Use Tuple in Elixir, see if this can handle being used as an array
-
A block is
{:block, ID}
and space is:free
defmodule Filesystem do
defstruct storage: {}
def new(storage) when is_list(storage) do
%__MODULE__{storage: List.to_tuple(storage)}
end
def size(%__MODULE__{storage: storage}), do: tuple_size(storage)
def checksum(%__MODULE__{storage: storage}) do
storage
|> Tuple.to_list()
|> Stream.with_index()
|> Enum.reduce(0, fn
{{:file, id}, index}, checksum -> checksum + (index * id)
{:free, _index}, checksum -> checksum
end)
end
def move_blocks_step(fs, nil), do: move_blocks_step(fs, {size(fs) - 1, 0})
def move_blocks_step(%__MODULE__{storage: storage} = fs, {prev_file_idx, prev_free_idx}) do
free_idx = first_free_idx(storage, prev_free_idx)
{file_idx, file_id} = last_file_idx(storage, prev_file_idx)
if free_idx >= file_idx do
{:done, fs}
else
storage
|> put_elem(free_idx, {:file, file_id})
|> put_elem(file_idx, :free)
|> then(fn storage ->
fs = %__MODULE__{fs | storage: storage}
{:ok, fs, {file_idx, free_idx}}
end)
end
end
defp last_file_idx(storage, prev_idx) do
Enum.find(prev_idx..0//-1, fn index ->
match?({:file, _id}, elem(storage, index))
end)
|> then(fn index when is_number(index) ->
{index, storage |> elem(index) |> elem(1)}
end)
end
defp first_free_idx(storage, prev_idx) do
Enum.find(prev_idx..tuple_size(storage) - 1, fn index ->
match?(:free, elem(storage, index))
end)
end
end
Stream.repeatedly(fn -> :next_step end)
|> Enum.reduce_while({Filesystem.new(disk_map), nil}, fn _, {fs, pos} ->
case Filesystem.move_blocks_step(fs, pos) do
{:done, fs} -> {:halt, fs}
{:ok, fs, pos} -> {:cont, {fs, pos}}
end
end)
|> Filesystem.checksum()
Optimization
The un-optimized version always searched from the left/right most block inward, re-visiting blocks on the outer edges many times.
Since the algorithm leaves no gaps, we can safely ignore all blocks we previously visited. We just retain the position and start from there on the next iteration.
All times measured on my MacBook Pro M2
- Always search from outer bounds: 15.9sec
- Search from previous pos: 3.7sec
Part 2
- The moving of blocks above has fragmented most files accross the filesystem
- This makes accessing them slow
- We change the algorithm above to move whole files where possible
- We only move each file once
- In order of decreasing file-ID (right to left)
- If there is no space to move the file to, don’t move it
defmodule MoveWithoutFragmenting do
def move_blocks_step(fs, nil, nil) do
boundary = Filesystem.size(fs)
file_start_pos = {boundary, boundary}
move_blocks_step(fs, MapSet.new(), file_start_pos)
end
def move_blocks_step(%Filesystem{storage: storage} = fs, prev_moved, prev_file_pos) do
with {file_id, file_pos, prev_moved} <- last_file(storage, prev_moved, prev_file_pos) do
free_pos = first_free(storage, block_size(file_pos))
cond do
free_pos == :not_found ->
# No free space found, skip file
{:ok, fs, prev_moved, file_pos}
is_after?(free_pos, file_pos) ->
# Won't move files backwards, skip
{:ok, fs, prev_moved, file_pos}
true ->
storage
|> swap_range(file_id, file_pos, free_pos)
|> then(fn storage ->
fs = %Filesystem{fs | storage: storage}
{:ok, fs, prev_moved, file_pos}
end)
end
else
:not_found ->
# No files are left in the filesystem
{:done, fs}
end
end
def is_after?({a_start, _}, {_, b_stop}), do: a_start > b_stop
def block_size({start, stop}), do: stop - start
defp swap_range(storage, file_id, {file_start, file_stop}, {free_start, free_stop}) do
file_size = file_stop - file_start
free_size = free_stop - free_start
if file_size != free_size do
IO.inspect({file_start, file_stop, file_size}, label: "file")
IO.inspect({free_start, free_stop, free_size}, label: "free")
raise "ASSERT file and free space differ!"
end
Enum.reduce(file_start..file_stop, storage, fn idx, storage ->
put_elem(storage, idx, :free)
end)
|> then(&Enum.reduce(free_start..free_stop, &1, fn idx, storage ->
put_elem(storage, idx, {:file, file_id})
end))
end
defp last_file(storage, prev_moved, {prev_idx, _}) do
Enum.reduce_while((prev_idx - 1)..0//-1, :not_found, fn idx, acc ->
case {acc, elem(storage, idx)} do
{:not_found, :free} ->
{:cont, :not_found}
{:not_found, {:file, id}} ->
if MapSet.member?(prev_moved, id) do
# Already moved this file before
{:cont, :not_found}
else
{:cont, {id, {idx, idx}}}
end
{{id, {_, stop}}, {:file, found}} when id == found ->
{:cont, {id, {idx, stop}}}
{{id, pos}, {:file, _other}} ->
{:halt, {id, pos}}
{{id, pos}, :free} ->
{:halt, {id, pos}}
end
end)
|> then(fn
{id, pos} -> {id, pos, MapSet.put(prev_moved, id)}
res -> res
end)
end
defp first_free(storage, size) do
Enum.reduce_while(0..tuple_size(storage) - 1, :not_found, fn idx, acc ->
case {acc, elem(storage, idx)} do
{:not_found, {:file, _id}} -> {:cont, :not_found}
{:not_found, :free} -> {:cont, {idx, idx}}
{{start, _}, :free} -> {:cont, {start, idx}}
{{start, stop}, {:file, _id}} when stop - start >= size -> {:halt, {start, start + size}}
{_pos, {:file, _id}} -> {:cont, :not_found}
end
end)
|> then(fn
{start, stop} when stop - start < size -> :not_found
res -> res
end)
end
end
Stream.repeatedly(fn -> :next_step end)
|> Enum.reduce_while({Filesystem.new(disk_map), nil, nil}, fn _, {fs, history, file_pos} ->
case MoveWithoutFragmenting.move_blocks_step(fs, history, file_pos) do
{:done, fs} -> {:halt, fs}
{:ok, fs, history, file_pos} -> {:cont, {fs, history, file_pos}}
end
end)
|> Filesystem.checksum()
Optimization
It could be improved by collecting all file IDs that have been moved in a MapSet
. If the same file is then later found again, we don’t even look for free space and just skip it immediately.
- unoptimized (MacBook Pro M2): 12sec
- optimized (MacBook Pro M2): 8.6sec
The 28% performance increase comes at the cost of making the code a good bit uglier and less focused. Check this files history in Git to see the less ugly version :)