Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Docx with Pandoc via Panpipe

docx_with_pandoc_via_panpipe.livemd

Docx with Pandoc via Panpipe

text_server_root = Path.join(__DIR__, "..")

Mix.install(
  [
    {:text_server, path: text_server_root, env: :dev}
  ],
  config_path: Path.join(text_server_root, "config/config.exs"),
  consolidate_protocols: false,
  lockfile: Path.join(text_server_root, "mix.lock")
)

Mix.Task.reenable("compile.rambo")
Mix.Task.run("compile.rambo")

Parse the docx into a Pandoc AST

docx =
  "priv/static/uploads/exemplar_files/GN_A Pausanias reader in progress, restarted 2020.05.01(1)-Gipson-6-18-2022-a16b87a2a69f98965106a63473335835.docx"

{:ok, ast} = Panpipe.ast(input: docx, track_changes: "all", extract_media: "tmp/media/pausanias")
defmodule TextServer.Livebook.Panpipe do
  @current_location [0]
  @location_regex ~r/\{\d+\.\d+\.\d+\}/

  defmodule TextNode do
    use Panpipe.AST.Node, type: :block, fields: [:location]

    def child_type(), do: :inline

    def to_pandoc(%__MODULE__{children: children, location: location}) do
      %{
        "t" => "Para",
        "c" => Enum.map(children, &Panpipe.AST.Node.to_pandoc/1),
        "location" => location
      }
    end

    defimpl Enumerable do
      def member?(_node, _), do: {:error, __MODULE__}
      def count(_node), do: {:error, __MODULE__}
      def slice(_node), do: {:error, __MODULE__}

      def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}

      def reduce(node, {:suspend, acc}, fun) do
        {:suspended, acc, &reduce(node, &1, fun)}
      end

      def reduce(node, {:cont, acc}, fun) do
        TextServer.Livebook.Panpipe.TextNode.children(node)
        |> Enum.reduce(fun.(node, acc), fn child, result ->
          Enumerable.reduce(%{child | parent: node}, result, fun)
        end)
      end
    end
  end

  def transform_ast(%Panpipe.Document{} = ast) do
    ast |> Panpipe.transform(&transform_node/1)
  end

  def transform_node(%Panpipe.AST.Emph{} = node) do
    node
  end

  def transform_node(%Panpipe.AST.Para{} = node) do
    tn = Enum.into(%TextNode{}, Map.take(node, [:children]))
    %TextNode{tn | location: [1]}
  end

  def transform_node(%Panpipe.AST.Str{} = node) do
    string = Map.get(node, :string)
    matches = Regex.run(@location_regex, string)

    if !is_nil(matches) && length(matches) > 0 do
      _location =
        List.first(matches)
        |> String.replace("{", "")
        |> String.replace("}", "")
        |> String.split(".")

      node
    end
  end

  def transform_node(node), do: node
end
TextServer.Livebook.Panpipe.transform_ast(ast)