Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Scraper Lite

docs/scraper.livemd

Scraper Lite

# Mix.install([
#   {:poison, "~> 5.0"},
#   {:floki, "~> 0.35.2"},
#   {:req, "~> 0.4.8"},
#   {:youtube_captions, "~> 0.1.0"}
# ])

Root

# defmodule R do
#   def recompile() do
#     Mix.Task.reenable("app.start")
#     Mix.Task.reenable("compile")
#     Mix.Task.reenable("compile.all")
#     compilers = Mix.compilers()
#     Enum.each(compilers, &Mix.Task.reenable("compile.#{&1}"))
#     Mix.Task.run("compile.all")
#   end
# end

# R.recompile()
# {:ok, src} = Vyasa.Written.create_source(%{title: "rama"})
# Vyasa.Medium.Writer.init(%)

SAAVN

link =
  "https://saavn.dev/api/albums?link=https://www.jiosaavn.com/album/thiruvasagam-vol-1-to-6/M2txfVKHMQ0_"

p =
  Finch.build(:get, link)
  |> Finch.request!(Vyasa.Finch)
defmodule Vyasa.Stream do
  def stream([{_url, _path} | _] = data) do
    Task.async_stream(
      data,
      fn {url, path} ->
        download(url, path)
      end,
      timeout: :infinity
    )
  end

  def download(url, file_path) do
    IO.puts("Starting to process #{inspect(file_path)}...........")

    # Open a file to which binary chunks will be appended to.
    # this process is reset in case of redirection
    file_pid = File.open!(file_path, [:write, :binary])

    unless is_pid(file_pid), do: raise("File creation problem on disk")

    # the HTTP stream request
    Finch.build(:get, url)
    |> Finch.stream_while(Vyasa.Finch, nil, fn
      # we put the status in the "acc" to handle redirections
      {:status, status}, _acc ->
        {:cont, status}

      # - when we receive 302, we put the "location" header in the "acc"
      # - when we receive a 200, we put the "content-length" and the file name in the "acc",
      {:headers, headers}, acc ->
        handle_headers(headers, acc)

      # when we receive the "location" tuple, we recurse
      # otherwise, we write the chunk into the file and print out the current progress.
      {:data, data}, acc ->
        handle_data(data, acc, file_path, file_pid)
    end)

    case File.close(file_pid) do
      :ok ->
        {:halt, {file_path, :done}}

      {:error, _reason} ->
        {:halt, :error}
    end
  end

  def handle_headers(headers, status) when status in [301, 302, 303, 307, 308] do
    IO.puts("REDIR: #{status}")

    {:cont, Enum.find(headers, &(elem(&1, 0) == "location"))}
  end

  def handle_headers(headers, 200) do
    {"content-length", size} =
      Enum.find(headers, &(elem(&1, 0) == "content-length"))

    case size do
      nil ->
        {:cont, {0, 0}}

      size ->
        {:cont, {0, String.to_integer(size)}}
    end
  end

  def handle_headers(_, status) do
    dbg(status)
    {:halt, :bad_status}
  end

  def handle_data(_data, {"location", location}, file_path, file_pid) do
    if Process.alive?(file_pid), do: :ok = File.close(file_pid)

    # recursion
    download(location, file_path)
  end

  def handle_data(data, {processed, size}, file_path, file_pid) do
    case IO.binwrite(file_pid, data) do
      :ok ->
        processed =
          if is_integer(size) and size > 0 do
            (processed + byte_size(data))
            |> tap(fn processed ->
              IO.inspect(Float.round(processed * 100 / size, 1),
                label: "Processed #{inspect(file_path)} %: "
              )
            end)
          else
            processed + byte_size(data)
          end

        {:cont, {processed, size}}

      {:error, reason} ->
        {:error, reason}
    end
  end
end
path = "#{File.cwd!()}/media/thiruvasagam/"
Application.ensure_all_started(:inets)

data =
  p
  |> Map.get(:body)
  |> Jason.decode!()
  |> Map.get("data")
  |> Map.get("songs")
  # |> List.first()
  |> Enum.map(fn %{"downloadUrl" => urls, "name" => name} ->
    url = urls |> Enum.reverse() |> hd() |> Map.get("url")
    {url, path <> name}
  end)
  # |> Task.async_stream((fn {url, name} ->
  #   Vyasa.Stream.download(url, path <> name)
  # end), timeout: :infinity)
  |> Vyasa.Stream.stream()
  |> Stream.run()

GUPOPE

link = "https://www.projectmadurai.org/pm_etexts/utf8/pmuni0094.html"

p =
  Finch.build(:get, link)
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
  |> Floki.parse_document!()
  |> Floki.find("body")
  |> tap(&amp;IO.inspect(&amp;1, limit: :infinity))
# [{"body", [], body , something}] =p
b = p |> List.first()

{"body", [], body} = b

ts =
  body
  |> Enum.reduce({-1, %{}, []}, fn
    # Clear window and start new chapter
    {"h3", _, chapter}, {curr, acc, win} when is_list(chapter) ->
      IO.inspect(win, label: "remaining window")
      {curr + 1, Map.put(acc, curr + 1, %{chapter: List.last(chapter), verse: []}), []}

    {"h3", _, chapter}, {curr, acc, win} ->
      IO.inspect(win, label: "remaining window")
      {curr + 1, Map.put(acc, curr + 1, %{chapter: chapter}), []}

    # New value in topic, add appropriate topic_map to topic_maps in accumulator
    {"center", [], [{"h3", [], list}]}, acc ->
      acc

    {_, _, _}, acc ->
      # other translations not today 
      acc

    line, {curr, acc, win} ->
      IO.inspect(line, label: "caught")

      newin =
        case Regex.run(~r/\s*(\(?\d+\)?)\s*$/, line) do
          [_, verse_number] ->
            verse_number = String.replace(verse_number, "(", "") |> String.replace(")", "")
            verse_number = String.trim(verse_number)

            f_l =
              line
              |> String.replace(~r/\s*\(?\d+\)?\s*/, "")
              |> String.trim()

            # Allocate the verse to the corresponding number
            [{String.to_integer(verse_number), f_l} | win]

          _ ->
            # If no verse number, just accumulate the line
            [{nil, line} | win]
        end

      {curr, acc, newin}
  end)
{last, map, win} = ts

{count, _, v} =
  [:END | win]
  |> Enum.reverse()
  |> Enum.reduce({0, [], %{}}, fn
    {verse_number, text}, {curr, rem, acc} when is_number(verse_number) ->
      {_, verses} =
        rem
        |> Enum.reverse()
        |> Enum.reduce_while({verse_number, acc}, fn
          x, {count, acc} when count > curr ->
            case Regex.match?(~r/^[A-Z\s'!@#$%^&*()_+={}\[\]:;"'<>,.?\/\\-]+$/, x) do
              false ->
                {:cont,
                 {count - 1, Map.update(acc, count - 1, x, fn existing -> [x | existing] end)}}

              _ ->
                {:cont, {count, acc}}
            end

          x, {_, acc} ->
            {:halt, acc}
        end)

      {verse_number, [],
       Map.update(verses, verse_number, text, fn existing -> [text | existing] end)}

    {nil, text}, {curr, rem, acc} when length(rem) > 4 ->
      {curr, [text | rem |> Enum.reverse() |> tl() |> Enum.reverse()], acc}

    {nil, text}, {curr, rem, acc} ->
      {curr, [text | rem], acc}

    :END, {curr, rem, acc} ->
      {_, terminal} =
        rem
        |> Enum.reverse()
        |> Enum.reduce({curr, acc}, fn x, {count, acc} ->
          {count + 1, Map.update(acc, count + 1, x, fn existing -> [x | existing] end)}
        end)

      {curr, [], terminal}
  end)

v

# |> Enum.map(fn {verse_number, texts} ->
#       {verse_number, Enum.reverse(texts)}
#     end)

Thiruvaasagam

url = "https://www.sivaya.org/"
# fetch from root frame
path = "thiruvaasagam_complete.php"
doc = "?lang=english"

col =
  Finch.build(:get, url <> path <> doc)
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
  |> Floki.parse_document!()
  |> Floki.find("body")
  |> tap(&amp;IO.inspect(&amp;1, limit: :infinity))
defmodule ThiruvaasagamScraper do
  @base_url "https://www.sivaya.org/thiruvaasagam_complete.php"

  def scrape(lang) do
    {:ok, html} =
      Finch.build(:get, @base_url <> "?lang=" <> lang)
      |> Finch.request!(Vyasa.Finch)
      |> Map.get(:body)
      |> Floki.parse_document()

    chapters = extract_chapters(html, lang)
    %{chapters: chapters}
  end

  def extract_chapters(html, lang) do
    html
    |> Floki.find("a[href^='thirumurai_song.php?&pathigam_no=8.']")
    |> Enum.map(&amp;extract_chapter(&amp;1, lang))
  end

  defp extract_chapter(chapter_link, lang) do
    href = Floki.attribute(chapter_link, "href")

    chapter_number =
      href
      |> List.first()
      |> extract_chapter_number()

    chapter_title = Floki.text(chapter_link)
    # verses = extract_verses(chapter_number, lang)

    %{
      number: chapter_number,
      title: chapter_title,
      url: href
      # verses: verses
    }
  end

  defp extract_chapter_number(href) do
    Regex.run(~r/pathigam_no=8\.(\d+)/, href)
    |> List.last()
    |> String.to_integer()
  end

  def extract_verses(chapter_number, lang) when is_number(chapter_number) do
    chapter_url =
      "thirumurai_song.php?&pathigam_no=8.#{chapter_number}&lang=" <> lang

    extract_verses(chapter_url)
  end

  def extract_verses(chapter_url) when is_binary(chapter_url) do
    {:ok, chapter_html} =
      Finch.build(:get, "https://www.sivaya.org/" <> chapter_url)
      |> Finch.request!(Vyasa.Finch)
      |> Map.get(:body)
      |> Floki.parse_document()

    chapter_html
    |> Floki.find("table td")
    |> extract_verse_texts()
  end

  defp extract_verse_texts(td) do
    td
    |> Enum.reduce([], fn
      {"td", [],
       [
         {"a", [{"id", _}], []},
         links,
         _
       ]} = td,
      acc ->
        [links | acc]

      _, acc ->
        acc
    end)
    |> Enum.reverse()
    |> Enum.map(
      &amp;(Floki.text(&amp;1)
        |> String.split("\n\n")
        |> Enum.reject(fn
          "" -> true
          _ -> false
        end))
    )
    |> List.flatten()
  end
end
e_v = ThiruvaasagamScraper.extract_verses(101, "english")

t_v = ThiruvaasagamScraper.extract_verses(101, "tamil")
# %Vyasa.Written.Source{
#     title: "Thiruvasagam",
#     verses: (for i <- 0..length(t_v) do
#     %Vyasa.Written.Verse{no: i, body: Enum.at(t_v, i), translations: (for i <- 0..length(e_v) do
#     %Vyasa.Written.Translation{lang: "en", body: Enum.at(t_v, i)}
#     end)}
#     end)

#   }

# %Vyasa.Written.Source{}
#   |> Vyasa.Written.Source.gen_changeset(%{title: "Thiruvasagam", chapters: })
chapters_e = ThiruvaasagamScraper.scrape("english")
chapters_t = ThiruvaasagamScraper.scrape("tamil")
%{id: sid} =
  source =
  %Vyasa.Written.Source{}
  |> Vyasa.Written.Source.gen_changeset(%{title: "thiruvasagam", lang: "ta"})
  |> Vyasa.Repo.insert!()

c_t =
  chapters_t[:chapters]
  |> Enum.reduce(
    [],
    fn
      %{number: no, title: title, url: [url]}, [%{number: curr_no, url: curr_url} = curr | acc]
      when no == curr_no + 100 ->
        [Map.put(curr, :url, [url | curr_url]) | acc]

      %{number: no, title: title, url: url}, acc ->
        [
          %{
            no: no - 100,
            title:
              String.split(title, "-")
              |> List.first()
              |> String.split("   ")
              |> List.last()
              |> String.trim(),
            url: url,
            source_id: sid
          }
          | acc
        ]
    end
  )
  |> Enum.map(fn %{url: url} = c ->
    Map.put(
      c,
      :verses,
      url
      |> Enum.map(fn u ->
        ThiruvaasagamScraper.extract_verses(u)
      end)
      |> List.flatten()
      |> Enum.reduce({1, []}, fn x, {count, verses} ->
        {count + 1, [%{no: count, body: x, source_id: sid} | verses]}
      end)
      |> elem(1)
    )
  end)
chapters =
  c_t
  |> Enum.uniq(&amp; &amp;1.no)
  |> Enum.map(fn c ->
    %Vyasa.Written.Chapter{source_id: sid}
    |> Vyasa.Written.Chapter.changeset(c)
    |> Vyasa.Repo.insert!()
  end)
Vyasa.Written.get_source!("219f1767-6018-45af-9567-0a3fe52671af")
ThiruvaasagamScraper.extract_verses("thirumurai_song.php?&pathigam_no=8.101&lang=tamil")
t_v = ThiruvaasagamScraper.extract_verses(101, "hindi")

GUPOPE

link = "https://www.projectmadurai.org/pm_etexts/utf8/pmuni0094.html"

p =
  Finch.build(:get, link)
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
  |> Floki.parse_document!()
  |> Floki.find("body")
  |> tap(&amp;IO.inspect(&amp;1, limit: :infinity))
# [{"body", [], body , something}] =p
b = p |> List.first()

{"body", [], body} = b

ts =
  body
  |> Enum.reduce({-1, %{}, []}, fn
    # Clear window and start new chapter
    {"h3", _, chapter}, {curr, acc, win} when is_list(chapter) ->
      IO.inspect(win, label: "remaining window")
      {curr + 1, Map.put(acc, curr + 1, %{chapter: List.last(chapter), verse: []}), []}

    {"h3", _, chapter}, {curr, acc, win} ->
      IO.inspect(win, label: "remaining window")
      {curr + 1, Map.put(acc, curr + 1, %{chapter: chapter}), []}

    # New value in topic, add appropriate topic_map to topic_maps in accumulator
    {"center", [], [{"h3", [], list}]}, acc ->
      acc

    {_, _, _}, acc ->
      # other translations not today 
      acc

    line, {curr, acc, win} ->
      newin =
        case Regex.run(~r/\s*(\(?\d+\)?)\s*$/, line) do
          [_, verse_number] ->
            verse_number = String.replace(verse_number, "(", "") |> String.replace(")", "")
            verse_number = String.trim(verse_number)

            f_l =
              line
              |> String.replace(~r/\s*\(?\d+\)?\s*/, "")
              |> String.trim()

            # Allocate the verse to the corresponding number
            [{String.to_integer(verse_number), f_l} | win]

          _ ->
            # If no verse number, just accumulate the line
            [{nil, line} | win]
        end

      {curr, acc, newin}
  end)
{last, map, win} = ts

win
|> Enum.reverse()
|> Enum.reduce({0, %{}}, fn
  {verse_number, text}, {curr, acc} when is_number(verse_number) ->
    {verse_number, Map.update(acc, verse_number, text, fn existing -> [text | existing] end)}

  {nil, text}, {curr, acc} ->
    {curr + 1, Map.update(acc, curr + 1, text, fn existing -> [text | existing] end)}
end)

# |> Enum.map(fn {verse_number, texts} ->
#       {verse_number, Enum.reverse(texts)}
#     end)

Thiruvaasagam

url = "https://www.sivaya.org/"
# fetch from root frame
path = "thiruvaasagam_complete.php"
doc = "?lang=english"

col =
  Finch.build(:get, url <> path <> doc)
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
  |> Floki.parse_document!()
  |> Floki.find("body")
  |> tap(&amp;IO.inspect(&amp;1, limit: :infinity))
defmodule ThiruvaasagamScraper do
  @base_url "https://www.sivaya.org/thiruvaasagam_complete.php"

  def scrape(lang) do
    {:ok, html} =
      Finch.build(:get, @base_url <> "?lang=" <> lang)
      |> Finch.request!(Vyasa.Finch)
      |> Map.get(:body)
      |> Floki.parse_document()

    chapters = extract_chapters(html, lang)
    %{chapters: chapters}
  end

  def extract_chapters(html, lang) do
    html
    |> Floki.find("a[href^='thirumurai_song.php?&pathigam_no=8.']")
    |> Enum.map(&amp;extract_chapter(&amp;1, lang))
  end

  defp extract_chapter(chapter_link, lang) do
    href = Floki.attribute(chapter_link, "href")

    chapter_number =
      href
      |> List.first()
      |> extract_chapter_number()

    chapter_title = Floki.text(chapter_link)
    # verses = extract_verses(chapter_number, lang)

    %{
      number: chapter_number,
      title: chapter_title,
      url: href
      # verses: verses
    }
  end

  defp extract_chapter_number(href) do
    Regex.run(~r/pathigam_no=8\.(\d+)/, href)
    |> List.last()
    |> String.to_integer()
  end

  def extract_verses(chapter_number, lang) when is_number(chapter_number) do
    chapter_url =
      "https://www.sivaya.org/thirumurai_song.php?&pathigam_no=8.#{chapter_number}&lang=" <> lang

    extract_verses(chapter_url, lang)
  end

  def extract_verses(chapter_url, lang) when is_binary(chapter_url) do
    {:ok, chapter_html} =
      Finch.build(:get, chapter_url)
      |> Finch.request!(Vyasa.Finch)
      |> Map.get(:body)
      |> Floki.parse_document()

    chapter_html
    |> Floki.find("table td")
    |> extract_verse_texts()
  end

  defp extract_verse_texts(td) do
    td
    |> Enum.reduce([], fn
      {"td", [],
       [
         {"a", [{"id", _}], []},
         links,
         _
       ]} = td,
      acc ->
        [links | acc]

      _, acc ->
        acc
    end)
    |> Enum.reverse()
    |> Enum.map(
      &amp;(Floki.text(&amp;1)
        |> String.split("\n\n")
        |> Enum.reject(fn
          "" -> true
          _ -> false
        end))
    )
    |> List.flatten()
  end
end
e_v = ThiruvaasagamScraper.extract_verses(101, "english")

t_v = ThiruvaasagamScraper.extract_verses(101, "tamil")
# %Vyasa.Written.Source{
#     title: "Thiruvasagam",
#     verses: (for i <- 0..length(t_v) do
#     %Vyasa.Written.Verse{no: i, body: Enum.at(t_v, i), translations: (for i <- 0..length(e_v) do
#     %Vyasa.Written.Translation{lang: "en", body: Enum.at(t_v, i)}
#     end)}
#     end)

#   }

# %Vyasa.Written.Source{}
#   |> Vyasa.Written.Source.gen_changeset(%{title: "Thiruvasagam", chapters: })
chapters_e = ThiruvaasagamScraper.scrape("english")
chapters_t = ThiruvaasagamScraper.scrape("tamil")
chapters_e[:chapters]
|> Enum.reduce(
  [],
  fn
    %{number: no, title: title, url: [url]}, [%{number: curr_no, url: curr_url} = curr | acc]
    when no == curr_no + 100 ->
      [Map.put(curr, :url, [url | curr_url]) | acc]

    %{number: no, title: title, url: url}, acc ->
      [
        %{
          number: no - 100,
          title:
            String.split(title, "-")
            |> List.first()
            |> String.split("   ")
            |> List.last()
            |> String.trim(),
          url: url
        }
        | acc
      ]
  end
)
|> Enum.find(&amp;(&amp;1.number == 1))
chapters_e
t_v = ThiruvaasagamScraper.extract_verses(101, "hindi")

Valmiki Ramayana

url = "https://www.valmikiramayan.net/utf8/"
# fetch from root frame
path = "baala/sarga1/"
doc = "balasans1.htm"

col =
  Finch.build(:get, url <> path <> doc)
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
  |> Floki.parse_document!()
  |> Floki.find("body")
  |> List.first()
  |> elem(2)
defmodule Rama do
  # audio n+1
  def parse(data, acc) do
    case {data, acc} do
      {{"p", [{"class", "SanSloka"}], [{"audio", _, _} | _] = audio},
       [%{"count" => c} = curr | _] = acc} ->
        #  IO.inspect(audio, label: "audio")
        [src] =
          audio
          |> Floki.find("source")
          |> Floki.attribute("src")

        [
          curr
          |> Map.put("count", c + 1)
          |> Map.put("audio", src)
          | acc
        ]

      {{"p", [{"class", "SanSloka"}], [{"audio", _, _} | _] = audio}, []} ->
        [src] =
          audio
          |> Floki.find("source")
          |> Floki.attribute("src")

        [%{"count" => 1, "audio" => src}]

      # verse n + 1
      {{"p", [{"class", "SanSloka"}], verses}, [curr | acc]} ->
        [Map.put(curr, "verse", verses |> Floki.text() |> String.trim()) | acc]

      # nesting in verloc

      {{"p", [{"class", "verloc"}], [{"p", [{"class", "SanSloka"}], sloka} | _] = ns_tree},
       [curr | _] = acc}
      when is_map(curr) ->
        # IO.inspect(ns_tree)
        # IO.inspect(ns_tree)
        Enum.reduce(ns_tree, acc, &amp;parse/2)

      {{"p", [{"class", "verloc"}], rem} = c_tree, [curr | acc]} when is_map(curr) ->
        [curr | acc]

      # # n case before verse break
      {{"p", [{"class", class}], _} = c_tree, [curr | acc]} when is_map(curr) ->
        [Map.put(curr, class, c_tree |> Floki.text() |> String.trim()) | acc]

      {para, acc} ->
        # IO.inspect(para, label: "div")
        acc
    end
  end
end

output =
  col
  |> Enum.reduce([], &amp;Rama.parse/2)

# # formatting & tying loose ends
# clean_verses =
#   [Map.put(curr, "count", count + 1) | verses]
#   |> Enum.reverse()
File.mkdir_p!(Path.expand(path, "media"))

output
|> Enum.reduce([], fn
  %{"audio" => aud, "count" => 12 = count, "verse" => verse}, acc ->
    aud_bin =
      Finch.build(
        :get,
        Path.join(url <> path, aud)
        |> String.replace(~r/\.\//, "")
      )
      |> Finch.request!(Vyasa.Finch)
      |> Map.get(:body)

    m_path = Path.expand(path <> "/#{count}.mp3", "media")
    File.write!(m_path, aud_bin)

    {:ok,
     %Vyasa.Parser.MP3{
       duration: d,
       path: p,
       title: title
     }} = Vyasa.Parser.MP3.parse(m_path)

    [
      %Vyasa.Medium.Event{
        origin: 0,
        duration: d,
        fragments: [%{status: "firstpass", quote: verse}]
      }
      | acc
    ]

  _, acc ->
    acc
end)
aud =
  output
  |> Enum.find(&amp;(Map.get(&amp;1, "count") == 1))
  |> Map.get("audio")
aud_bin =
  Finch.build(
    :get,
    Path.join(url <> path, aud)
    |> String.replace(~r/\.\//, "")
  )
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
# # IO.binwrite(path, aud)
# # |> :file.read_file_info()
# File.open(Path.expand([path,aud], "media"), [:write, :binary])
# # |> IO.binwrite(aud_bin)
# Path.expand([path,aud], "media")
# # |> File.touch!()
File.mkdir_p!(Path.expand(path, "media"))
{:ok, file} = File.write!(Path.expand(path, "media"), aud_bin)

Shlokam

url = "https://shlokam.org/"
path = "hanumanchalisa"

col =
  Finch.build(:get, url <> path)
  |> Finch.request!(Vyasa.Finch)
  |> Map.get(:body)
  |> Floki.parse_document!()
  |> Floki.find(".uncode_text_column")
# class_key = %{
#   "verse_meaning" => "en_translation",
# "verse_sanskrit" => "text",
# "verse_trans" => "transliteration",
# }
output =
  col
  |> Enum.reduce(%{title: nil, description: nil, verses: []}, fn
    {"div", _, [{"h3", [], ["Description"]} | para]}, acc ->
      # IO.inspect(rem, label: "div")
      desc =
        para
        |> Floki.text()

      %{acc | description: desc}

    {"div", _, [{"h3", _, _} = h3_tree]}, acc ->
      title =
        h3_tree
        |> Floki.text()

      %{acc | title: title}

    {"div", _, [{"div", [{"class", "verse_sanskrit"}], _verse} | _] = verse_tree}, acc ->
      [curr | [%{"count" => count} | _] = verses] =
        Enum.reduce(verse_tree, [], fn
          # n case verse break
          {"hr", [{"class", "verse_separator"}], []}, [curr | [%{"count" => c} | _] = acc] ->
            [Map.put(curr, "count", c + 1) | acc]

          # init verse break
          {"hr", [{"class", "verse_separator"}], []}, [curr | acc] ->
            [Map.put(curr, "count", 1) | acc]

          # n case after verse break
          {"div", [{"class", class}], _} = c_tree, [%{"count" => _} | _] = acc ->
            [%{class => c_tree |> Floki.text()} | acc]

          # n case before verse break
          {"div", [{"class", class}], _} = c_tree, [curr | acc] when is_map(curr) ->
            [Map.put(curr, class, c_tree |> Floki.text()) | acc]

          # init
          {"div", [{"class", class}], _} = c_tree, [] ->
            [%{class => c_tree |> Floki.text()}]

          others, acc ->
            IO.inspect(others)
            acc
        end)

      # formatting & tying loose ends
      clean_verses =
        [Map.put(curr, "count", count + 1) | verses]
        |> Enum.reverse()

      %{acc | verses: clean_verses}

    _, acc ->
      acc
  end)
contents = Poison.encode!(output)
# File.write!("chalisa_scraped.json", contents)
filename = "chalisa_scraped.json"
url = "./Projects/vyasa/scripts/#{filename}"
File.write!(url, contents)
IO.puts(url)
Floki.traverse_and_update(html, fn
  {"div", _, _} = node -> {"div", [], ["Modified Div"]}
  node -> node
end)

text = find_element(:class, "verse_sanskrit") |> inner_text()
transliteration = find_element(:class, "verse_trans") |> inner_text()
en_translation = find_element(:class, "verse_meaning") |> inner_text()

map = %{
  verse_number: count,
  text: text,
  transliteration: transliteration,
  en_translation: en_translation
}

Gita Events

gita = Vyasa.Written.get_source_by_title("Gita")
verses = Vyasa.Written.get_verses_in_chapter(1, gita.id)
verse_lookup = Enum.into(for(%{id: id, no: verse_no} <- verses, do: {verse_no, id}), %{})

c1_path = Path.expand("./1.mp3", "media/gita")

{:ok,
 %Vyasa.Parser.MP3{
   duration: tot_d,
   path: p
 }} = Vyasa.Parser.MP3.parse(c1_path)

{:ok, voice} =
  Vyasa.Medium.create_voice(%{lang: "sa", duration: tot_d, file_path: c1_path, source_id: gita.id})

# Vyasa.Medium.get_voice!("4c73fb6d-4163-4b64-90d0-5d49680c1ee4")
# |> Vyasa.Medium.delete_voice()
"""
start :- 00:00
Shloka 1:-    00:33
Shloka 2 :-   00:49
Shloka 3:-    01:06
Shloka 4:-   01:19
Shloka 5:-   01:32 
Shloka 6:-   01:46
Shloka 7:-   02:00
Shloka 8:-   02:15
Shloka 9:-   02:28
Shloka 10:-  02:42
Shloka 11:-  02:56
Shloka 12:- 03:09
Shloka 13:- 03:22
Shloka 14:- 03:36
Shloka 15:- 03:49
Shloka 16:- 04:02
Shloka 17:- 04:14
Shloka 18:- 04:27
Shloka 19:- 04:40
Shloka 20:- 04:54
Shloka 21:-  05:07
Shloka 22:-  05:23
Shloka 23:-  05:36
Shloka 24:-  05:50
Shloka 25:- 06:05
Shloka 26:- 06:18
Shloka 27:- 06:32
Shloka 28:- 06:46
Shloka 29:-  07:01
Shloka 30:-  07:13
Shloka 31:-  07:26
Shloka 32 :-  07:38
Shloka 33:-   07:52
Shloka 34 :-  08:05
Shloka 35 :-  08:18
Shloka 36 :- 08:31
Shloka 37:-  08:44
Shloka 38 :- 08:57
Shloka 39:-  09:09
Shloka 40:-  09:22
Shloka 41:- 09:35
Shloka 42:- 09:48
Shloka 43:-  10:02
Shloka 44:-  10:16
Shloka 45:-  10:29
Shloka 46:-  10:40
Shloka 47:- 10:53
end:-  11:08
"""
|> String.split("\n")
|> Enum.map(fn x ->
  x
  |> String.split(":-")
  |> Enum.map(&amp;String.trim/1)
  |> Enum.reduce([], fn
    <<"Shloka"::utf8, sep::utf8, verse_no::binary>>, acc ->
      [verse_lookup[String.to_integer(verse_no)] | acc]

    bin, acc ->
      [bin | acc]
  end)
end)
|> IO.inspect(limit: :infinity)
|> Enum.reduce(
  [],
  fn
    [time, "start"], acc ->
      [%Vyasa.Medium.Event{origin: 0, phase: "start", voice_id: voice.id} | acc]

    [time, "end"], [%{origin: o} = prev | acc] ->
      [min, sec] = time |> String.split(":") |> Enum.map(&amp;String.to_integer/1)
      d = (min * 60 + sec) * 1000

      [
        %Vyasa.Medium.Event{origin: d, duration: tot_d - d, phase: "end", voice_id: voice.id}
        | [%{prev | duration: d - o} | acc]
      ]

    [time, id], [%{origin: o} = prev | acc] ->
      [min, sec] = time |> String.split(":") |> Enum.map(&amp;String.to_integer/1)
      d = (min * 60 + sec) * 1000

      [
        %Vyasa.Medium.Event{origin: d, verse_id: id, voice_id: voice.id}
        | [%{prev | duration: d - o} | acc]
      ]

    _, acc ->
      acc
  end
)
|> Enum.map(&amp;Vyasa.Medium.create_event(&amp;1))