Organizer
Mix.install([
{:explorer, "~> 0.7.0"},
{:poison, "~> 5.0"}
])
org_path = "~/Projects/org"
org_path
|> Path.expand()
|> File.cd!()
Section
[_, fname | fnames] = Path.wildcard("roam/*") |> Enum.reject(&File.dir?/1)
fnames
defmodule OrgUtils do
def try_get_file_id(contents) do
lines = contents |> String.split("\n")
id_line =
lines
|> Enum.find(fn line ->
line |> String.starts_with?(":ID:")
end)
case id_line do
nil -> nil
_ -> id_line |> String.replace(":ID:", "") |> String.trim()
end
end
def zip_with_nonnil_values(enum, f) do
for value <- enum, f.(value) != nil do
{value, f.(value)}
end
end
def read_lines(path) do
path
|> File.read!()
|> String.split("\n")
end
def get_link_ids_from_line(line) do
~r"\[\[id:.*\]\]"
|> Regex.run(line)
|> nil_to_empty_list()
|> Enum.map(&extract_id/1)
end
defp extract_id(id_pattern) do
id_pattern
|> String.replace(~r"].*", "")
|> String.replace(~r"\[\[id:", "")
end
def get_links(ids_to_files) do
fnames = ids_to_files |> Map.values()
fnames_to_links =
for fname <- fnames do
linked_fnames =
fname
|> read_lines()
|> Enum.flat_map(&get_link_ids_from_line/1)
|> Enum.flat_map(fn id ->
ids_to_files |> Map.get(id) |> convert_value_to_singleton()
end)
{fname, linked_fnames}
end
fnames_to_links
|> Enum.reject(fn {_, ids} -> ids == [] end)
|> Map.new()
end
defp nil_to_empty_list(nil), do: []
defp nil_to_empty_list(other), do: other
def convert_value_to_singleton(nil), do: []
def convert_value_to_singleton(other), do: [other]
end
import OrgUtils
files_to_ids =
fnames
|> zip_with_nonnil_values(fn fname -> fname |> File.read!() |> try_get_file_id() end)
|> Map.new()
ids_to_files =
for {fname, id} <- files_to_ids, into: %{} do
{id, fname}
end
lines = "roam/20230303100438-small_data_nlp.org" |> OrgUtils.read_lines()
[id_pat] = lines |> Enum.flat_map(&get_link_ids_from_line/1)
lines
# all_ids = ids_by_files |> Map.keys()
files_to_links = get_links(ids_to_files)
files_to_links
files_to_links |> Enum.count()
files_to_links
|> Enum.map(fn {_, gp} -> gp |> Enum.count() end)
|> Enum.sum()
defmodule RoamPathUtils do
def get_path_record(p, key_prefix \\ "") do
splits =
~r"\d+"
|> Regex.split(p, include_captures: true, parts: 2)
|> Enum.map(fn s -> s |> String.trim("/") |> String.trim("-") end)
splits |> inspect() |> IO.puts()
raw_dict =
case splits do
[root_path, date, name] ->
%{"root_path" => root_path, "date" => date, "name" => name, "path" => p}
[date, name] ->
%{"date" => date, "name" => name, "path" => p}
_ ->
%{"path" => p}
end
for {k, v} <- raw_dict, into: %{} do
{key_prefix <> k, v}
end
end
end
roam_link_graph_records =
for {source, destinations} <- files_to_links, dest <- destinations do
source_record = RoamPathUtils.get_path_record(source, "source_")
dest_record = RoamPathUtils.get_path_record(dest, "destination_")
source_record |> Map.merge(dest_record)
end
filtered_roam_link_graph_records =
roam_link_graph_records
|> Enum.filter(fn rec -> rec |> Map.keys() |> Enum.count() == 8 end)
__DIR__
|> Path.dirname()
|> Path.join("data/org_roam_records_2023_10_16.json")
|> File.write!(
filtered_roam_link_graph_records
|> Poison.encode!()
)
roam_links_df =
filtered_roam_link_graph_records
|> Explorer.DataFrame.new()
File.cwd!()
roam_links_df |> Explorer.DataFrame.to_parquet!("../data/org_roam_records_2023_09_10.parquet")