Docx with Pandoc via Panpipe
text_server_root = Path.join(__DIR__, "..")
Mix.install(
[
{:text_server, path: text_server_root, env: :dev}
],
config_path: Path.join(text_server_root, "config/config.exs"),
consolidate_protocols: false,
lockfile: Path.join(text_server_root, "mix.lock")
)
Mix.Task.reenable("compile.rambo")
Mix.Task.run("compile.rambo")
Parse the docx into a Pandoc AST
docx =
"priv/static/uploads/exemplar_files/GN_A Pausanias reader in progress, restarted 2020.05.01(1)-Gipson-6-18-2022-a16b87a2a69f98965106a63473335835.docx"
{:ok, ast} = Panpipe.ast(input: docx, track_changes: "all", extract_media: "tmp/media/pausanias")
defmodule TextServer.Livebook.Panpipe do
@current_location [0]
@location_regex ~r/\{\d+\.\d+\.\d+\}/
defmodule TextNode do
use Panpipe.AST.Node, type: :block, fields: [:location]
def child_type(), do: :inline
def to_pandoc(%__MODULE__{children: children, location: location}) do
%{
"t" => "Para",
"c" => Enum.map(children, &Panpipe.AST.Node.to_pandoc/1),
"location" => location
}
end
defimpl Enumerable do
def member?(_node, _), do: {:error, __MODULE__}
def count(_node), do: {:error, __MODULE__}
def slice(_node), do: {:error, __MODULE__}
def reduce(_, {:halt, acc}, _fun), do: {:halted, acc}
def reduce(node, {:suspend, acc}, fun) do
{:suspended, acc, &reduce(node, &1, fun)}
end
def reduce(node, {:cont, acc}, fun) do
TextServer.Livebook.Panpipe.TextNode.children(node)
|> Enum.reduce(fun.(node, acc), fn child, result ->
Enumerable.reduce(%{child | parent: node}, result, fun)
end)
end
end
end
def transform_ast(%Panpipe.Document{} = ast) do
ast |> Panpipe.transform(&transform_node/1)
end
def transform_node(%Panpipe.AST.Emph{} = node) do
node
end
def transform_node(%Panpipe.AST.Para{} = node) do
tn = Enum.into(%TextNode{}, Map.take(node, [:children]))
%TextNode{tn | location: [1]}
end
def transform_node(%Panpipe.AST.Str{} = node) do
string = Map.get(node, :string)
matches = Regex.run(@location_regex, string)
if !is_nil(matches) && length(matches) > 0 do
_location =
List.first(matches)
|> String.replace("{", "")
|> String.replace("}", "")
|> String.split(".")
node
end
end
def transform_node(node), do: node
end
TextServer.Livebook.Panpipe.transform_ast(ast)