Day 9: Disk Fragmenter

day9_disk_fragmenter.livemd

Lukas Knuth

@LukasKnuth

aoc_24

Share to X

Share to Bluesky

More notebooks

Day 9: Disk Fragmenter

Mix.install([
  {:kino, "~> 0.14.0"}
])

Reading

We’re compressing somebodies hand-crafted filesystem
Input a “disk map” that continas the information about data on the disk
- each digit indidactes alternatingly the size of a file
- OR the number of free space
- 12345 is a 1-block file, 2 block of free space, a 3-block file, 4 blocks of free space and a 5-block file
BEFORE re-arranging files we need to calculate the file-ID for each file
- starting at 0, counting from the start of the map, +1 for each file
We want to de-fragment the disk by
- moving each file BLOCK independently to the next free space
- the blocks of a single file can be fragmented/don’t have to be continuous

Input

# Useful for debugging
render_as_example = fn parsed_list ->
  Enum.reduce(parsed_list, "", fn
    {:file, id}, acc -> acc <> to_string(id)
    :free, acc -> acc <> "."
  end) 
  |> IO.inspect(label: "as example:")
end

disk_map =
  Kino.FS.file_path("day9_input.bin")
  |> File.read!()
  |> String.codepoints()
  |> Stream.reject(&amp;Kernel.==(&amp;1, "\n"))
  |> Stream.map(&amp;String.to_integer(&amp;1))
  |> Stream.zip(Stream.cycle([:file, :free]))
  |> Enum.reduce({0, []}, fn
    {0, _type}, {id, list} ->
      {id, list}
    
    {size, :file}, {id, list} ->
      list = Enum.reduce(1..size, list, fn _, list -> [{:file, id} | list] end)
      {id + 1, list}
      
    {size, :free}, {id, list} ->
      list = Enum.reduce(1..size, list, fn _, list -> [:free | list] end)
      {id, list}
  end)
  |> then(fn {_, list} -> Enum.reverse(list) end)

:ok

Implementation

We don’t need to keep the original order of blocks - as long as the block is correctly identified as being from the same file as before
Blocks belonging to a single file don’t have to be continious

Idea

parse the input to be an array of the full storage
- Use Tuple in Elixir, see if this can handle being used as an array
A block is {:block, ID} and space is :free

defmodule Filesystem do
  defstruct storage: {}
  def new(storage) when is_list(storage) do
    %__MODULE__{storage: List.to_tuple(storage)}
  end

  def size(%__MODULE__{storage: storage}), do: tuple_size(storage)

  def checksum(%__MODULE__{storage: storage}) do
    storage
    |> Tuple.to_list()
    |> Stream.with_index()
    |> Enum.reduce(0, fn
      {{:file, id}, index}, checksum -> checksum + (index * id)
      {:free, _index}, checksum -> checksum
    end)
  end

  def move_blocks_step(fs, nil), do: move_blocks_step(fs, {size(fs) - 1, 0})
  def move_blocks_step(%__MODULE__{storage: storage} = fs, {prev_file_idx, prev_free_idx}) do
    free_idx = first_free_idx(storage, prev_free_idx)
    {file_idx, file_id} = last_file_idx(storage, prev_file_idx)
    if free_idx >= file_idx do
      {:done, fs}
    else
      storage
      |> put_elem(free_idx, {:file, file_id})
      |> put_elem(file_idx, :free)
      |> then(fn storage ->
        fs = %__MODULE__{fs | storage: storage}
        {:ok, fs, {file_idx, free_idx}}
      end)
    end
  end

  defp last_file_idx(storage, prev_idx) do
    Enum.find(prev_idx..0//-1, fn index ->
      match?({:file, _id}, elem(storage, index))
    end)
    |> then(fn index when is_number(index) ->
      {index, storage |> elem(index) |> elem(1)}
    end)
  end

  defp first_free_idx(storage, prev_idx) do
    Enum.find(prev_idx..tuple_size(storage) - 1, fn index ->
      match?(:free, elem(storage, index))
    end)
  end
end

Stream.repeatedly(fn -> :next_step end)
|> Enum.reduce_while({Filesystem.new(disk_map), nil}, fn _, {fs, pos} ->
  case Filesystem.move_blocks_step(fs, pos) do
    {:done, fs} -> {:halt, fs}
    {:ok, fs, pos} -> {:cont, {fs, pos}}
  end
end)
|> Filesystem.checksum()

Optimization

The un-optimized version always searched from the left/right most block inward, re-visiting blocks on the outer edges many times.

Since the algorithm leaves no gaps, we can safely ignore all blocks we previously visited. We just retain the position and start from there on the next iteration.

All times measured on my MacBook Pro M2

Always search from outer bounds: 15.9sec
Search from previous pos: 3.7sec

Part 2

The moving of blocks above has fragmented most files accross the filesystem
This makes accessing them slow
We change the algorithm above to move whole files where possible
We only move each file once
In order of decreasing file-ID (right to left)
If there is no space to move the file to, don’t move it

defmodule MoveWithoutFragmenting do
  def move_blocks_step(fs, nil, nil) do
    boundary = Filesystem.size(fs)
    file_start_pos = {boundary, boundary}
    move_blocks_step(fs, MapSet.new(), file_start_pos)
  end
  
  def move_blocks_step(%Filesystem{storage: storage} = fs, prev_moved, prev_file_pos) do
    with {file_id, file_pos, prev_moved} <- last_file(storage, prev_moved, prev_file_pos) do
      free_pos = first_free(storage, block_size(file_pos))
      cond do
        free_pos == :not_found ->
          # No free space found, skip file
          {:ok, fs, prev_moved, file_pos}
        
        is_after?(free_pos, file_pos) ->
          # Won't move files backwards, skip
          {:ok, fs, prev_moved, file_pos}
        
        true ->
          storage
          |> swap_range(file_id, file_pos, free_pos)
          |> then(fn storage ->
            fs = %Filesystem{fs | storage: storage}
            {:ok, fs, prev_moved, file_pos}
          end)
      end
    else
      :not_found ->
        # No files are left in the filesystem
        {:done, fs}
    end
  end

  def is_after?({a_start, _}, {_, b_stop}), do: a_start > b_stop
  
  def block_size({start, stop}), do: stop - start

  defp swap_range(storage, file_id, {file_start, file_stop}, {free_start, free_stop}) do
    file_size = file_stop - file_start
    free_size = free_stop - free_start
    if file_size != free_size do
      IO.inspect({file_start, file_stop, file_size}, label: "file")
      IO.inspect({free_start, free_stop, free_size}, label: "free")
      raise "ASSERT file and free space differ!"
    end
    
    Enum.reduce(file_start..file_stop, storage, fn idx, storage ->
      put_elem(storage, idx, :free)
    end)
    |> then(&amp;Enum.reduce(free_start..free_stop, &amp;1, fn idx, storage ->
      put_elem(storage, idx, {:file, file_id})
    end))
  end

  defp last_file(storage, prev_moved, {prev_idx, _}) do
    Enum.reduce_while((prev_idx - 1)..0//-1, :not_found, fn idx, acc -> 
      case {acc, elem(storage, idx)} do
        {:not_found, :free} ->
          {:cont, :not_found}
        {:not_found, {:file, id}} ->
          if MapSet.member?(prev_moved, id) do
            # Already moved this file before
            {:cont, :not_found}
          else
            {:cont, {id, {idx, idx}}}
          end
        {{id, {_, stop}}, {:file, found}} when id == found ->
          {:cont, {id, {idx, stop}}}
        {{id, pos}, {:file, _other}} ->
          {:halt, {id, pos}}
        {{id, pos}, :free} ->
          {:halt, {id, pos}}
      end
    end)
    |> then(fn
      {id, pos} -> {id, pos, MapSet.put(prev_moved, id)}
      res -> res
    end)
  end

  defp first_free(storage, size) do
    Enum.reduce_while(0..tuple_size(storage) - 1, :not_found, fn idx, acc ->
      case {acc, elem(storage, idx)} do
        {:not_found, {:file, _id}} -> {:cont, :not_found}
        {:not_found, :free} -> {:cont, {idx, idx}}
        {{start, _}, :free} -> {:cont, {start, idx}}
        {{start, stop}, {:file, _id}} when stop - start >= size -> {:halt, {start, start + size}}
        {_pos, {:file, _id}} -> {:cont, :not_found}
      end
    end)
    |> then(fn
      {start, stop} when stop - start < size -> :not_found
      res -> res
    end)
  end
end

Stream.repeatedly(fn -> :next_step end)
|> Enum.reduce_while({Filesystem.new(disk_map), nil, nil}, fn _, {fs, history, file_pos} ->
  case MoveWithoutFragmenting.move_blocks_step(fs, history, file_pos) do
    {:done, fs} -> {:halt, fs}
    {:ok, fs, history, file_pos} -> {:cont, {fs, history, file_pos}}
  end
end)
|> Filesystem.checksum()

Optimization

It could be improved by collecting all file IDs that have been moved in a MapSet. If the same file is then later found again, we don’t even look for free space and just skip it immediately.

unoptimized (MacBook Pro M2): 12sec
optimized (MacBook Pro M2): 8.6sec

The 28% performance increase comes at the cost of making the code a good bit uglier and less focused. Check this files history in Git to see the less ugly version :)

Other notebooks:

Yejun Su
@goofansu

ogp

ogp

ogp.livemd

tutorial intermediate ogp kino

2022-8-18
@TomBers

livebookNotes

Attractors

attractors.livemd

advanced data-science decimal vega_lite kino

2022-8-18
Kevin Pan
@feng19

spider_man

ElixirJobs

elixirjobs.livemd

tutorial advanced spider_man floki nimble_csv kino

2022-8-18
@TomBers

livebookNotes

Fun with Graphs

graphs.livemd

tutorial advanced intermediate vega_lite kino math

2022-8-18
Numerical Elixir (Nx)
@elixir-nx

axon

Training an Autoencoder on Fashion MNIST

fashionmnist_autoencoder.livemd

tutorial advanced data-science axon exla scidata nx

2022-8-18
Byron Saltysiak
@byronsalty

fastbook-elixir

Bonus: Playground

bonus_playground.livemd

advanced data-science jason kino kino_vega_lite image nx nx_image bumblebee explorer kino_explorer exla fastbook_elixir httpoison poison axon

2024-2-24
Tyler Johnson
@tylerj

aoc

Day 20 - Advent of Code 2023

day-20.livemd

tutorial advanced testing debugging kino benchee math

2023-12-25

Back