Scraper Lite
# Mix.install([
# {:poison, "~> 5.0"},
# {:floki, "~> 0.35.2"},
# {:req, "~> 0.4.8"},
# {:youtube_captions, "~> 0.1.0"}
# ])
Root
# defmodule R do
# def recompile() do
# Mix.Task.reenable("app.start")
# Mix.Task.reenable("compile")
# Mix.Task.reenable("compile.all")
# compilers = Mix.compilers()
# Enum.each(compilers, &Mix.Task.reenable("compile.#{&1}"))
# Mix.Task.run("compile.all")
# end
# end
# R.recompile()
# {:ok, src} = Vyasa.Written.create_source(%{title: "rama"})
# Vyasa.Medium.Writer.init(%)
SAAVN
link =
"https://saavn.dev/api/albums?link=https://www.jiosaavn.com/album/thiruvasagam-vol-1-to-6/M2txfVKHMQ0_"
p =
Finch.build(:get, link)
|> Finch.request!(Vyasa.Finch)
defmodule Vyasa.Stream do
def stream([{_url, _path} | _] = data) do
Task.async_stream(
data,
fn {url, path} ->
download(url, path)
end,
timeout: :infinity
)
end
def download(url, file_path) do
IO.puts("Starting to process #{inspect(file_path)}...........")
# Open a file to which binary chunks will be appended to.
# this process is reset in case of redirection
file_pid = File.open!(file_path, [:write, :binary])
unless is_pid(file_pid), do: raise("File creation problem on disk")
# the HTTP stream request
Finch.build(:get, url)
|> Finch.stream_while(Vyasa.Finch, nil, fn
# we put the status in the "acc" to handle redirections
{:status, status}, _acc ->
{:cont, status}
# - when we receive 302, we put the "location" header in the "acc"
# - when we receive a 200, we put the "content-length" and the file name in the "acc",
{:headers, headers}, acc ->
handle_headers(headers, acc)
# when we receive the "location" tuple, we recurse
# otherwise, we write the chunk into the file and print out the current progress.
{:data, data}, acc ->
handle_data(data, acc, file_path, file_pid)
end)
case File.close(file_pid) do
:ok ->
{:halt, {file_path, :done}}
{:error, _reason} ->
{:halt, :error}
end
end
def handle_headers(headers, status) when status in [301, 302, 303, 307, 308] do
IO.puts("REDIR: #{status}")
{:cont, Enum.find(headers, &(elem(&1, 0) == "location"))}
end
def handle_headers(headers, 200) do
{"content-length", size} =
Enum.find(headers, &(elem(&1, 0) == "content-length"))
case size do
nil ->
{:cont, {0, 0}}
size ->
{:cont, {0, String.to_integer(size)}}
end
end
def handle_headers(_, status) do
dbg(status)
{:halt, :bad_status}
end
def handle_data(_data, {"location", location}, file_path, file_pid) do
if Process.alive?(file_pid), do: :ok = File.close(file_pid)
# recursion
download(location, file_path)
end
def handle_data(data, {processed, size}, file_path, file_pid) do
case IO.binwrite(file_pid, data) do
:ok ->
processed =
if is_integer(size) and size > 0 do
(processed + byte_size(data))
|> tap(fn processed ->
IO.inspect(Float.round(processed * 100 / size, 1),
label: "Processed #{inspect(file_path)} %: "
)
end)
else
processed + byte_size(data)
end
{:cont, {processed, size}}
{:error, reason} ->
{:error, reason}
end
end
end
path = "#{File.cwd!()}/media/thiruvasagam/"
Application.ensure_all_started(:inets)
data =
p
|> Map.get(:body)
|> Jason.decode!()
|> Map.get("data")
|> Map.get("songs")
# |> List.first()
|> Enum.map(fn %{"downloadUrl" => urls, "name" => name} ->
url = urls |> Enum.reverse() |> hd() |> Map.get("url")
{url, path <> name}
end)
# |> Task.async_stream((fn {url, name} ->
# Vyasa.Stream.download(url, path <> name)
# end), timeout: :infinity)
|> Vyasa.Stream.stream()
|> Stream.run()
GUPOPE
link = "https://www.projectmadurai.org/pm_etexts/utf8/pmuni0094.html"
p =
Finch.build(:get, link)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document!()
|> Floki.find("body")
|> tap(&IO.inspect(&1, limit: :infinity))
# [{"body", [], body , something}] =p
b = p |> List.first()
{"body", [], body} = b
ts =
body
|> Enum.reduce({-1, %{}, []}, fn
# Clear window and start new chapter
{"h3", _, chapter}, {curr, acc, win} when is_list(chapter) ->
IO.inspect(win, label: "remaining window")
{curr + 1, Map.put(acc, curr + 1, %{chapter: List.last(chapter), verse: []}), []}
{"h3", _, chapter}, {curr, acc, win} ->
IO.inspect(win, label: "remaining window")
{curr + 1, Map.put(acc, curr + 1, %{chapter: chapter}), []}
# New value in topic, add appropriate topic_map to topic_maps in accumulator
{"center", [], [{"h3", [], list}]}, acc ->
acc
{_, _, _}, acc ->
# other translations not today
acc
line, {curr, acc, win} ->
IO.inspect(line, label: "caught")
newin =
case Regex.run(~r/\s*(\(?\d+\)?)\s*$/, line) do
[_, verse_number] ->
verse_number = String.replace(verse_number, "(", "") |> String.replace(")", "")
verse_number = String.trim(verse_number)
f_l =
line
|> String.replace(~r/\s*\(?\d+\)?\s*/, "")
|> String.trim()
# Allocate the verse to the corresponding number
[{String.to_integer(verse_number), f_l} | win]
_ ->
# If no verse number, just accumulate the line
[{nil, line} | win]
end
{curr, acc, newin}
end)
{last, map, win} = ts
{count, _, v} =
[:END | win]
|> Enum.reverse()
|> Enum.reduce({0, [], %{}}, fn
{verse_number, text}, {curr, rem, acc} when is_number(verse_number) ->
{_, verses} =
rem
|> Enum.reverse()
|> Enum.reduce_while({verse_number, acc}, fn
x, {count, acc} when count > curr ->
case Regex.match?(~r/^[A-Z\s'!@#$%^&*()_+={}\[\]:;"'<>,.?\/\\-]+$/, x) do
false ->
{:cont,
{count - 1, Map.update(acc, count - 1, x, fn existing -> [x | existing] end)}}
_ ->
{:cont, {count, acc}}
end
x, {_, acc} ->
{:halt, acc}
end)
{verse_number, [],
Map.update(verses, verse_number, text, fn existing -> [text | existing] end)}
{nil, text}, {curr, rem, acc} when length(rem) > 4 ->
{curr, [text | rem |> Enum.reverse() |> tl() |> Enum.reverse()], acc}
{nil, text}, {curr, rem, acc} ->
{curr, [text | rem], acc}
:END, {curr, rem, acc} ->
{_, terminal} =
rem
|> Enum.reverse()
|> Enum.reduce({curr, acc}, fn x, {count, acc} ->
{count + 1, Map.update(acc, count + 1, x, fn existing -> [x | existing] end)}
end)
{curr, [], terminal}
end)
v
# |> Enum.map(fn {verse_number, texts} ->
# {verse_number, Enum.reverse(texts)}
# end)
Thiruvaasagam
url = "https://www.sivaya.org/"
# fetch from root frame
path = "thiruvaasagam_complete.php"
doc = "?lang=english"
col =
Finch.build(:get, url <> path <> doc)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document!()
|> Floki.find("body")
|> tap(&IO.inspect(&1, limit: :infinity))
defmodule ThiruvaasagamScraper do
@base_url "https://www.sivaya.org/thiruvaasagam_complete.php"
def scrape(lang) do
{:ok, html} =
Finch.build(:get, @base_url <> "?lang=" <> lang)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document()
chapters = extract_chapters(html, lang)
%{chapters: chapters}
end
def extract_chapters(html, lang) do
html
|> Floki.find("a[href^='thirumurai_song.php?&pathigam_no=8.']")
|> Enum.map(&extract_chapter(&1, lang))
end
defp extract_chapter(chapter_link, lang) do
href = Floki.attribute(chapter_link, "href")
chapter_number =
href
|> List.first()
|> extract_chapter_number()
chapter_title = Floki.text(chapter_link)
# verses = extract_verses(chapter_number, lang)
%{
number: chapter_number,
title: chapter_title,
url: href
# verses: verses
}
end
defp extract_chapter_number(href) do
Regex.run(~r/pathigam_no=8\.(\d+)/, href)
|> List.last()
|> String.to_integer()
end
def extract_verses(chapter_number, lang) when is_number(chapter_number) do
chapter_url =
"thirumurai_song.php?&pathigam_no=8.#{chapter_number}&lang=" <> lang
extract_verses(chapter_url)
end
def extract_verses(chapter_url) when is_binary(chapter_url) do
{:ok, chapter_html} =
Finch.build(:get, "https://www.sivaya.org/" <> chapter_url)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document()
chapter_html
|> Floki.find("table td")
|> extract_verse_texts()
end
defp extract_verse_texts(td) do
td
|> Enum.reduce([], fn
{"td", [],
[
{"a", [{"id", _}], []},
links,
_
]} = td,
acc ->
[links | acc]
_, acc ->
acc
end)
|> Enum.reverse()
|> Enum.map(
&(Floki.text(&1)
|> String.split("\n\n")
|> Enum.reject(fn
"" -> true
_ -> false
end))
)
|> List.flatten()
end
end
e_v = ThiruvaasagamScraper.extract_verses(101, "english")
t_v = ThiruvaasagamScraper.extract_verses(101, "tamil")
# %Vyasa.Written.Source{
# title: "Thiruvasagam",
# verses: (for i <- 0..length(t_v) do
# %Vyasa.Written.Verse{no: i, body: Enum.at(t_v, i), translations: (for i <- 0..length(e_v) do
# %Vyasa.Written.Translation{lang: "en", body: Enum.at(t_v, i)}
# end)}
# end)
# }
# %Vyasa.Written.Source{}
# |> Vyasa.Written.Source.gen_changeset(%{title: "Thiruvasagam", chapters: })
chapters_e = ThiruvaasagamScraper.scrape("english")
chapters_t = ThiruvaasagamScraper.scrape("tamil")
%{id: sid} =
source =
%Vyasa.Written.Source{}
|> Vyasa.Written.Source.gen_changeset(%{title: "thiruvasagam", lang: "ta"})
|> Vyasa.Repo.insert!()
c_t =
chapters_t[:chapters]
|> Enum.reduce(
[],
fn
%{number: no, title: title, url: [url]}, [%{number: curr_no, url: curr_url} = curr | acc]
when no == curr_no + 100 ->
[Map.put(curr, :url, [url | curr_url]) | acc]
%{number: no, title: title, url: url}, acc ->
[
%{
no: no - 100,
title:
String.split(title, "-")
|> List.first()
|> String.split(" ")
|> List.last()
|> String.trim(),
url: url,
source_id: sid
}
| acc
]
end
)
|> Enum.map(fn %{url: url} = c ->
Map.put(
c,
:verses,
url
|> Enum.map(fn u ->
ThiruvaasagamScraper.extract_verses(u)
end)
|> List.flatten()
|> Enum.reduce({1, []}, fn x, {count, verses} ->
{count + 1, [%{no: count, body: x, source_id: sid} | verses]}
end)
|> elem(1)
)
end)
chapters =
c_t
|> Enum.uniq(& &1.no)
|> Enum.map(fn c ->
%Vyasa.Written.Chapter{source_id: sid}
|> Vyasa.Written.Chapter.changeset(c)
|> Vyasa.Repo.insert!()
end)
Vyasa.Written.get_source!("219f1767-6018-45af-9567-0a3fe52671af")
ThiruvaasagamScraper.extract_verses("thirumurai_song.php?&pathigam_no=8.101&lang=tamil")
t_v = ThiruvaasagamScraper.extract_verses(101, "hindi")
GUPOPE
link = "https://www.projectmadurai.org/pm_etexts/utf8/pmuni0094.html"
p =
Finch.build(:get, link)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document!()
|> Floki.find("body")
|> tap(&IO.inspect(&1, limit: :infinity))
# [{"body", [], body , something}] =p
b = p |> List.first()
{"body", [], body} = b
ts =
body
|> Enum.reduce({-1, %{}, []}, fn
# Clear window and start new chapter
{"h3", _, chapter}, {curr, acc, win} when is_list(chapter) ->
IO.inspect(win, label: "remaining window")
{curr + 1, Map.put(acc, curr + 1, %{chapter: List.last(chapter), verse: []}), []}
{"h3", _, chapter}, {curr, acc, win} ->
IO.inspect(win, label: "remaining window")
{curr + 1, Map.put(acc, curr + 1, %{chapter: chapter}), []}
# New value in topic, add appropriate topic_map to topic_maps in accumulator
{"center", [], [{"h3", [], list}]}, acc ->
acc
{_, _, _}, acc ->
# other translations not today
acc
line, {curr, acc, win} ->
newin =
case Regex.run(~r/\s*(\(?\d+\)?)\s*$/, line) do
[_, verse_number] ->
verse_number = String.replace(verse_number, "(", "") |> String.replace(")", "")
verse_number = String.trim(verse_number)
f_l =
line
|> String.replace(~r/\s*\(?\d+\)?\s*/, "")
|> String.trim()
# Allocate the verse to the corresponding number
[{String.to_integer(verse_number), f_l} | win]
_ ->
# If no verse number, just accumulate the line
[{nil, line} | win]
end
{curr, acc, newin}
end)
{last, map, win} = ts
win
|> Enum.reverse()
|> Enum.reduce({0, %{}}, fn
{verse_number, text}, {curr, acc} when is_number(verse_number) ->
{verse_number, Map.update(acc, verse_number, text, fn existing -> [text | existing] end)}
{nil, text}, {curr, acc} ->
{curr + 1, Map.update(acc, curr + 1, text, fn existing -> [text | existing] end)}
end)
# |> Enum.map(fn {verse_number, texts} ->
# {verse_number, Enum.reverse(texts)}
# end)
Thiruvaasagam
url = "https://www.sivaya.org/"
# fetch from root frame
path = "thiruvaasagam_complete.php"
doc = "?lang=english"
col =
Finch.build(:get, url <> path <> doc)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document!()
|> Floki.find("body")
|> tap(&IO.inspect(&1, limit: :infinity))
defmodule ThiruvaasagamScraper do
@base_url "https://www.sivaya.org/thiruvaasagam_complete.php"
def scrape(lang) do
{:ok, html} =
Finch.build(:get, @base_url <> "?lang=" <> lang)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document()
chapters = extract_chapters(html, lang)
%{chapters: chapters}
end
def extract_chapters(html, lang) do
html
|> Floki.find("a[href^='thirumurai_song.php?&pathigam_no=8.']")
|> Enum.map(&extract_chapter(&1, lang))
end
defp extract_chapter(chapter_link, lang) do
href = Floki.attribute(chapter_link, "href")
chapter_number =
href
|> List.first()
|> extract_chapter_number()
chapter_title = Floki.text(chapter_link)
# verses = extract_verses(chapter_number, lang)
%{
number: chapter_number,
title: chapter_title,
url: href
# verses: verses
}
end
defp extract_chapter_number(href) do
Regex.run(~r/pathigam_no=8\.(\d+)/, href)
|> List.last()
|> String.to_integer()
end
def extract_verses(chapter_number, lang) when is_number(chapter_number) do
chapter_url =
"https://www.sivaya.org/thirumurai_song.php?&pathigam_no=8.#{chapter_number}&lang=" <> lang
extract_verses(chapter_url, lang)
end
def extract_verses(chapter_url, lang) when is_binary(chapter_url) do
{:ok, chapter_html} =
Finch.build(:get, chapter_url)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document()
chapter_html
|> Floki.find("table td")
|> extract_verse_texts()
end
defp extract_verse_texts(td) do
td
|> Enum.reduce([], fn
{"td", [],
[
{"a", [{"id", _}], []},
links,
_
]} = td,
acc ->
[links | acc]
_, acc ->
acc
end)
|> Enum.reverse()
|> Enum.map(
&(Floki.text(&1)
|> String.split("\n\n")
|> Enum.reject(fn
"" -> true
_ -> false
end))
)
|> List.flatten()
end
end
e_v = ThiruvaasagamScraper.extract_verses(101, "english")
t_v = ThiruvaasagamScraper.extract_verses(101, "tamil")
# %Vyasa.Written.Source{
# title: "Thiruvasagam",
# verses: (for i <- 0..length(t_v) do
# %Vyasa.Written.Verse{no: i, body: Enum.at(t_v, i), translations: (for i <- 0..length(e_v) do
# %Vyasa.Written.Translation{lang: "en", body: Enum.at(t_v, i)}
# end)}
# end)
# }
# %Vyasa.Written.Source{}
# |> Vyasa.Written.Source.gen_changeset(%{title: "Thiruvasagam", chapters: })
chapters_e = ThiruvaasagamScraper.scrape("english")
chapters_t = ThiruvaasagamScraper.scrape("tamil")
chapters_e[:chapters]
|> Enum.reduce(
[],
fn
%{number: no, title: title, url: [url]}, [%{number: curr_no, url: curr_url} = curr | acc]
when no == curr_no + 100 ->
[Map.put(curr, :url, [url | curr_url]) | acc]
%{number: no, title: title, url: url}, acc ->
[
%{
number: no - 100,
title:
String.split(title, "-")
|> List.first()
|> String.split(" ")
|> List.last()
|> String.trim(),
url: url
}
| acc
]
end
)
|> Enum.find(&(&1.number == 1))
chapters_e
t_v = ThiruvaasagamScraper.extract_verses(101, "hindi")
Valmiki Ramayana
url = "https://www.valmikiramayan.net/utf8/"
# fetch from root frame
path = "baala/sarga1/"
doc = "balasans1.htm"
col =
Finch.build(:get, url <> path <> doc)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document!()
|> Floki.find("body")
|> List.first()
|> elem(2)
defmodule Rama do
# audio n+1
def parse(data, acc) do
case {data, acc} do
{{"p", [{"class", "SanSloka"}], [{"audio", _, _} | _] = audio},
[%{"count" => c} = curr | _] = acc} ->
# IO.inspect(audio, label: "audio")
[src] =
audio
|> Floki.find("source")
|> Floki.attribute("src")
[
curr
|> Map.put("count", c + 1)
|> Map.put("audio", src)
| acc
]
{{"p", [{"class", "SanSloka"}], [{"audio", _, _} | _] = audio}, []} ->
[src] =
audio
|> Floki.find("source")
|> Floki.attribute("src")
[%{"count" => 1, "audio" => src}]
# verse n + 1
{{"p", [{"class", "SanSloka"}], verses}, [curr | acc]} ->
[Map.put(curr, "verse", verses |> Floki.text() |> String.trim()) | acc]
# nesting in verloc
{{"p", [{"class", "verloc"}], [{"p", [{"class", "SanSloka"}], sloka} | _] = ns_tree},
[curr | _] = acc}
when is_map(curr) ->
# IO.inspect(ns_tree)
# IO.inspect(ns_tree)
Enum.reduce(ns_tree, acc, &parse/2)
{{"p", [{"class", "verloc"}], rem} = c_tree, [curr | acc]} when is_map(curr) ->
[curr | acc]
# # n case before verse break
{{"p", [{"class", class}], _} = c_tree, [curr | acc]} when is_map(curr) ->
[Map.put(curr, class, c_tree |> Floki.text() |> String.trim()) | acc]
{para, acc} ->
# IO.inspect(para, label: "div")
acc
end
end
end
output =
col
|> Enum.reduce([], &Rama.parse/2)
# # formatting & tying loose ends
# clean_verses =
# [Map.put(curr, "count", count + 1) | verses]
# |> Enum.reverse()
File.mkdir_p!(Path.expand(path, "media"))
output
|> Enum.reduce([], fn
%{"audio" => aud, "count" => 12 = count, "verse" => verse}, acc ->
aud_bin =
Finch.build(
:get,
Path.join(url <> path, aud)
|> String.replace(~r/\.\//, "")
)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
m_path = Path.expand(path <> "/#{count}.mp3", "media")
File.write!(m_path, aud_bin)
{:ok,
%Vyasa.Parser.MP3{
duration: d,
path: p,
title: title
}} = Vyasa.Parser.MP3.parse(m_path)
[
%Vyasa.Medium.Event{
origin: 0,
duration: d,
fragments: [%{status: "firstpass", quote: verse}]
}
| acc
]
_, acc ->
acc
end)
aud =
output
|> Enum.find(&(Map.get(&1, "count") == 1))
|> Map.get("audio")
aud_bin =
Finch.build(
:get,
Path.join(url <> path, aud)
|> String.replace(~r/\.\//, "")
)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
# # IO.binwrite(path, aud)
# # |> :file.read_file_info()
# File.open(Path.expand([path,aud], "media"), [:write, :binary])
# # |> IO.binwrite(aud_bin)
# Path.expand([path,aud], "media")
# # |> File.touch!()
File.mkdir_p!(Path.expand(path, "media"))
{:ok, file} = File.write!(Path.expand(path, "media"), aud_bin)
Shlokam
url = "https://shlokam.org/"
path = "hanumanchalisa"
col =
Finch.build(:get, url <> path)
|> Finch.request!(Vyasa.Finch)
|> Map.get(:body)
|> Floki.parse_document!()
|> Floki.find(".uncode_text_column")
# class_key = %{
# "verse_meaning" => "en_translation",
# "verse_sanskrit" => "text",
# "verse_trans" => "transliteration",
# }
output =
col
|> Enum.reduce(%{title: nil, description: nil, verses: []}, fn
{"div", _, [{"h3", [], ["Description"]} | para]}, acc ->
# IO.inspect(rem, label: "div")
desc =
para
|> Floki.text()
%{acc | description: desc}
{"div", _, [{"h3", _, _} = h3_tree]}, acc ->
title =
h3_tree
|> Floki.text()
%{acc | title: title}
{"div", _, [{"div", [{"class", "verse_sanskrit"}], _verse} | _] = verse_tree}, acc ->
[curr | [%{"count" => count} | _] = verses] =
Enum.reduce(verse_tree, [], fn
# n case verse break
{"hr", [{"class", "verse_separator"}], []}, [curr | [%{"count" => c} | _] = acc] ->
[Map.put(curr, "count", c + 1) | acc]
# init verse break
{"hr", [{"class", "verse_separator"}], []}, [curr | acc] ->
[Map.put(curr, "count", 1) | acc]
# n case after verse break
{"div", [{"class", class}], _} = c_tree, [%{"count" => _} | _] = acc ->
[%{class => c_tree |> Floki.text()} | acc]
# n case before verse break
{"div", [{"class", class}], _} = c_tree, [curr | acc] when is_map(curr) ->
[Map.put(curr, class, c_tree |> Floki.text()) | acc]
# init
{"div", [{"class", class}], _} = c_tree, [] ->
[%{class => c_tree |> Floki.text()}]
others, acc ->
IO.inspect(others)
acc
end)
# formatting & tying loose ends
clean_verses =
[Map.put(curr, "count", count + 1) | verses]
|> Enum.reverse()
%{acc | verses: clean_verses}
_, acc ->
acc
end)
contents = Poison.encode!(output)
# File.write!("chalisa_scraped.json", contents)
filename = "chalisa_scraped.json"
url = "./Projects/vyasa/scripts/#{filename}"
File.write!(url, contents)
IO.puts(url)
Floki.traverse_and_update(html, fn
{"div", _, _} = node -> {"div", [], ["Modified Div"]}
node -> node
end)
text = find_element(:class, "verse_sanskrit") |> inner_text()
transliteration = find_element(:class, "verse_trans") |> inner_text()
en_translation = find_element(:class, "verse_meaning") |> inner_text()
map = %{
verse_number: count,
text: text,
transliteration: transliteration,
en_translation: en_translation
}
Gita Events
gita = Vyasa.Written.get_source_by_title("Gita")
verses = Vyasa.Written.get_verses_in_chapter(1, gita.id)
verse_lookup = Enum.into(for(%{id: id, no: verse_no} <- verses, do: {verse_no, id}), %{})
c1_path = Path.expand("./1.mp3", "media/gita")
{:ok,
%Vyasa.Parser.MP3{
duration: tot_d,
path: p
}} = Vyasa.Parser.MP3.parse(c1_path)
{:ok, voice} =
Vyasa.Medium.create_voice(%{lang: "sa", duration: tot_d, file_path: c1_path, source_id: gita.id})
# Vyasa.Medium.get_voice!("4c73fb6d-4163-4b64-90d0-5d49680c1ee4")
# |> Vyasa.Medium.delete_voice()
"""
start :- 00:00
Shloka 1:- 00:33
Shloka 2 :- 00:49
Shloka 3:- 01:06
Shloka 4:- 01:19
Shloka 5:- 01:32
Shloka 6:- 01:46
Shloka 7:- 02:00
Shloka 8:- 02:15
Shloka 9:- 02:28
Shloka 10:- 02:42
Shloka 11:- 02:56
Shloka 12:- 03:09
Shloka 13:- 03:22
Shloka 14:- 03:36
Shloka 15:- 03:49
Shloka 16:- 04:02
Shloka 17:- 04:14
Shloka 18:- 04:27
Shloka 19:- 04:40
Shloka 20:- 04:54
Shloka 21:- 05:07
Shloka 22:- 05:23
Shloka 23:- 05:36
Shloka 24:- 05:50
Shloka 25:- 06:05
Shloka 26:- 06:18
Shloka 27:- 06:32
Shloka 28:- 06:46
Shloka 29:- 07:01
Shloka 30:- 07:13
Shloka 31:- 07:26
Shloka 32 :- 07:38
Shloka 33:- 07:52
Shloka 34 :- 08:05
Shloka 35 :- 08:18
Shloka 36 :- 08:31
Shloka 37:- 08:44
Shloka 38 :- 08:57
Shloka 39:- 09:09
Shloka 40:- 09:22
Shloka 41:- 09:35
Shloka 42:- 09:48
Shloka 43:- 10:02
Shloka 44:- 10:16
Shloka 45:- 10:29
Shloka 46:- 10:40
Shloka 47:- 10:53
end:- 11:08
"""
|> String.split("\n")
|> Enum.map(fn x ->
x
|> String.split(":-")
|> Enum.map(&String.trim/1)
|> Enum.reduce([], fn
<<"Shloka"::utf8, sep::utf8, verse_no::binary>>, acc ->
[verse_lookup[String.to_integer(verse_no)] | acc]
bin, acc ->
[bin | acc]
end)
end)
|> IO.inspect(limit: :infinity)
|> Enum.reduce(
[],
fn
[time, "start"], acc ->
[%Vyasa.Medium.Event{origin: 0, phase: "start", voice_id: voice.id} | acc]
[time, "end"], [%{origin: o} = prev | acc] ->
[min, sec] = time |> String.split(":") |> Enum.map(&String.to_integer/1)
d = (min * 60 + sec) * 1000
[
%Vyasa.Medium.Event{origin: d, duration: tot_d - d, phase: "end", voice_id: voice.id}
| [%{prev | duration: d - o} | acc]
]
[time, id], [%{origin: o} = prev | acc] ->
[min, sec] = time |> String.split(":") |> Enum.map(&String.to_integer/1)
d = (min * 60 + sec) * 1000
[
%Vyasa.Medium.Event{origin: d, verse_id: id, voice_id: voice.id}
| [%{prev | duration: d - o} | acc]
]
_, acc ->
acc
end
)
|> Enum.map(&Vyasa.Medium.create_event(&1))