Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Day 9: Disk Fragmenter

day9_disk_fragmenter.livemd

Day 9: Disk Fragmenter

Mix.install([
  {:kino, "~> 0.14.0"}
])

Reading

  • We’re compressing somebodies hand-crafted filesystem
  • Input a “disk map” that continas the information about data on the disk
    • each digit indidactes alternatingly the size of a file
    • OR the number of free space
    • 12345 is a 1-block file, 2 block of free space, a 3-block file, 4 blocks of free space and a 5-block file
  • BEFORE re-arranging files we need to calculate the file-ID for each file
    • starting at 0, counting from the start of the map, +1 for each file
  • We want to de-fragment the disk by
    • moving each file BLOCK independently to the next free space
    • the blocks of a single file can be fragmented/don’t have to be continuous

Input

# Useful for debugging
render_as_example = fn parsed_list ->
  Enum.reduce(parsed_list, "", fn
    {:file, id}, acc -> acc <> to_string(id)
    :free, acc -> acc <> "."
  end) 
  |> IO.inspect(label: "as example:")
end

disk_map =
  Kino.FS.file_path("day9_input.bin")
  |> File.read!()
  |> String.codepoints()
  |> Stream.reject(&amp;Kernel.==(&amp;1, "\n"))
  |> Stream.map(&amp;String.to_integer(&amp;1))
  |> Stream.zip(Stream.cycle([:file, :free]))
  |> Enum.reduce({0, []}, fn
    {0, _type}, {id, list} ->
      {id, list}
    
    {size, :file}, {id, list} ->
      list = Enum.reduce(1..size, list, fn _, list -> [{:file, id} | list] end)
      {id + 1, list}
      
    {size, :free}, {id, list} ->
      list = Enum.reduce(1..size, list, fn _, list -> [:free | list] end)
      {id, list}
  end)
  |> then(fn {_, list} -> Enum.reverse(list) end)

:ok

Implementation

  • We don’t need to keep the original order of blocks - as long as the block is correctly identified as being from the same file as before
  • Blocks belonging to a single file don’t have to be continious

Idea

  • parse the input to be an array of the full storage
    • Use Tuple in Elixir, see if this can handle being used as an array
  • A block is {:block, ID} and space is :free
defmodule Filesystem do
  defstruct storage: {}
  def new(storage) when is_list(storage) do
    %__MODULE__{storage: List.to_tuple(storage)}
  end

  def size(%__MODULE__{storage: storage}), do: tuple_size(storage)

  def checksum(%__MODULE__{storage: storage}) do
    storage
    |> Tuple.to_list()
    |> Stream.with_index()
    |> Enum.reduce(0, fn
      {{:file, id}, index}, checksum -> checksum + (index * id)
      {:free, _index}, checksum -> checksum
    end)
  end

  def move_blocks_step(fs, nil), do: move_blocks_step(fs, {size(fs) - 1, 0})
  def move_blocks_step(%__MODULE__{storage: storage} = fs, {prev_file_idx, prev_free_idx}) do
    free_idx = first_free_idx(storage, prev_free_idx)
    {file_idx, file_id} = last_file_idx(storage, prev_file_idx)
    if free_idx >= file_idx do
      {:done, fs}
    else
      storage
      |> put_elem(free_idx, {:file, file_id})
      |> put_elem(file_idx, :free)
      |> then(fn storage ->
        fs = %__MODULE__{fs | storage: storage}
        {:ok, fs, {file_idx, free_idx}}
      end)
    end
  end

  defp last_file_idx(storage, prev_idx) do
    Enum.find(prev_idx..0//-1, fn index ->
      match?({:file, _id}, elem(storage, index))
    end)
    |> then(fn index when is_number(index) ->
      {index, storage |> elem(index) |> elem(1)}
    end)
  end

  defp first_free_idx(storage, prev_idx) do
    Enum.find(prev_idx..tuple_size(storage) - 1, fn index ->
      match?(:free, elem(storage, index))
    end)
  end
end

Stream.repeatedly(fn -> :next_step end)
|> Enum.reduce_while({Filesystem.new(disk_map), nil}, fn _, {fs, pos} ->
  case Filesystem.move_blocks_step(fs, pos) do
    {:done, fs} -> {:halt, fs}
    {:ok, fs, pos} -> {:cont, {fs, pos}}
  end
end)
|> Filesystem.checksum()

Optimization

The un-optimized version always searched from the left/right most block inward, re-visiting blocks on the outer edges many times.

Since the algorithm leaves no gaps, we can safely ignore all blocks we previously visited. We just retain the position and start from there on the next iteration.


All times measured on my MacBook Pro M2

  • Always search from outer bounds: 15.9sec
  • Search from previous pos: 3.7sec

Part 2

  • The moving of blocks above has fragmented most files accross the filesystem
  • This makes accessing them slow
  • We change the algorithm above to move whole files where possible
  • We only move each file once
  • In order of decreasing file-ID (right to left)
  • If there is no space to move the file to, don’t move it
defmodule MoveWithoutFragmenting do
  def move_blocks_step(fs, nil, nil) do
    boundary = Filesystem.size(fs)
    file_start_pos = {boundary, boundary}
    move_blocks_step(fs, MapSet.new(), file_start_pos)
  end
  
  def move_blocks_step(%Filesystem{storage: storage} = fs, prev_moved, prev_file_pos) do
    with {file_id, file_pos, prev_moved} <- last_file(storage, prev_moved, prev_file_pos) do
      free_pos = first_free(storage, block_size(file_pos))
      cond do
        free_pos == :not_found ->
          # No free space found, skip file
          {:ok, fs, prev_moved, file_pos}
        
        is_after?(free_pos, file_pos) ->
          # Won't move files backwards, skip
          {:ok, fs, prev_moved, file_pos}
        
        true ->
          storage
          |> swap_range(file_id, file_pos, free_pos)
          |> then(fn storage ->
            fs = %Filesystem{fs | storage: storage}
            {:ok, fs, prev_moved, file_pos}
          end)
      end
    else
      :not_found ->
        # No files are left in the filesystem
        {:done, fs}
    end
  end

  def is_after?({a_start, _}, {_, b_stop}), do: a_start > b_stop
  
  def block_size({start, stop}), do: stop - start

  defp swap_range(storage, file_id, {file_start, file_stop}, {free_start, free_stop}) do
    file_size = file_stop - file_start
    free_size = free_stop - free_start
    if file_size != free_size do
      IO.inspect({file_start, file_stop, file_size}, label: "file")
      IO.inspect({free_start, free_stop, free_size}, label: "free")
      raise "ASSERT file and free space differ!"
    end
    
    Enum.reduce(file_start..file_stop, storage, fn idx, storage ->
      put_elem(storage, idx, :free)
    end)
    |> then(&amp;Enum.reduce(free_start..free_stop, &amp;1, fn idx, storage ->
      put_elem(storage, idx, {:file, file_id})
    end))
  end

  defp last_file(storage, prev_moved, {prev_idx, _}) do
    Enum.reduce_while((prev_idx - 1)..0//-1, :not_found, fn idx, acc -> 
      case {acc, elem(storage, idx)} do
        {:not_found, :free} ->
          {:cont, :not_found}
        {:not_found, {:file, id}} ->
          if MapSet.member?(prev_moved, id) do
            # Already moved this file before
            {:cont, :not_found}
          else
            {:cont, {id, {idx, idx}}}
          end
        {{id, {_, stop}}, {:file, found}} when id == found ->
          {:cont, {id, {idx, stop}}}
        {{id, pos}, {:file, _other}} ->
          {:halt, {id, pos}}
        {{id, pos}, :free} ->
          {:halt, {id, pos}}
      end
    end)
    |> then(fn
      {id, pos} -> {id, pos, MapSet.put(prev_moved, id)}
      res -> res
    end)
  end

  defp first_free(storage, size) do
    Enum.reduce_while(0..tuple_size(storage) - 1, :not_found, fn idx, acc ->
      case {acc, elem(storage, idx)} do
        {:not_found, {:file, _id}} -> {:cont, :not_found}
        {:not_found, :free} -> {:cont, {idx, idx}}
        {{start, _}, :free} -> {:cont, {start, idx}}
        {{start, stop}, {:file, _id}} when stop - start >= size -> {:halt, {start, start + size}}
        {_pos, {:file, _id}} -> {:cont, :not_found}
      end
    end)
    |> then(fn
      {start, stop} when stop - start < size -> :not_found
      res -> res
    end)
  end
end

Stream.repeatedly(fn -> :next_step end)
|> Enum.reduce_while({Filesystem.new(disk_map), nil, nil}, fn _, {fs, history, file_pos} ->
  case MoveWithoutFragmenting.move_blocks_step(fs, history, file_pos) do
    {:done, fs} -> {:halt, fs}
    {:ok, fs, history, file_pos} -> {:cont, {fs, history, file_pos}}
  end
end)
|> Filesystem.checksum()

Optimization

It could be improved by collecting all file IDs that have been moved in a MapSet. If the same file is then later found again, we don’t even look for free space and just skip it immediately.

  • unoptimized (MacBook Pro M2): 12sec
  • optimized (MacBook Pro M2): 8.6sec

The 28% performance increase comes at the cost of making the code a good bit uglier and less focused. Check this files history in Git to see the less ugly version :)