Orb YouTube URL parser
Mix.install([
{:orb, "~> 0.0.48"},
{:wasmex, "~> 0.8.3"},
{:mime, "~> 2.0.5"},
{:kino, "~> 0.12.0"}
])
Define a helper module for parsing characters
defmodule CharParser do
use Orb
def may(chars, offset_mut_ref) when is_list(chars) do
Orb.snippet do
chars
|> Enum.with_index(fn char, index ->
Memory.load!(I32.U8, offset_mut_ref.read + index) === char
end)
|> Enum.reduce(&I32.band/2)
|> if do
offset_mut_ref.read + length(chars)
offset_mut_ref.write
end
end
end
def must(chars, offset_mut_ref, fail_instruction) when is_list(chars) do
Orb.snippet do
unless may(chars, offset_mut_ref) do
fail_instruction
end
end
end
# Future API?
# CharParser.seq(mut!(input_offset), return(0), must: ~c|http|, may: ~c|s|, must: ~c|:|, may: ~c|//|)
# Future API?
# CharParser.one_of([
# [may: ~c|www.|, must: ~c|youtube.com|], must: ~c|/watch?v=|],
# [must: ~c|youtu.be/|]
# ], mut!(input_offset), return(0))
def print(ast) do
IO.puts(Orb.ToWat.to_wat(ast, "") |> IO.iodata_to_binary())
end
end
Define our module using Orb
defmodule YouTubeURLParser do
use Orb
Memory.pages(2)
defp start_page_offset(), do: 1
defw(input_offset(), I32, do: start_page_offset() * Memory.page_byte_size())
defw parse(), I32, input_offset: I32.UnsafePointer do
input_offset = start_page_offset() * Memory.page_byte_size()
# input_offset[~c|http|]
Control.block OptionalScheme do
CharParser.must(~c|http|, mut!(input_offset), OptionalScheme.break())
# unless CharParser.may(~c|http|, mut!(input_offset)), do: OptionalScheme.break()
CharParser.may(~c|s|, mut!(input_offset))
CharParser.must(~c|:|, mut!(input_offset), OptionalScheme.break())
end
CharParser.may(~c|//|, mut!(input_offset))
Control.block Final do
Control.block ShortDomain do
input_offset =
Orb.Stack.push input_offset do
Control.block FullDomain do
CharParser.may(~c|www.|, mut!(input_offset))
CharParser.must(~c|youtube.com|, mut!(input_offset), FullDomain.break())
CharParser.must(~c|/watch?v=|, mut!(input_offset), FullDomain.break())
unless input_offset[at!: 0], do: FullDomain.break()
Final.break()
end
end
CharParser.must(~c|youtu.be/|, mut!(input_offset), ShortDomain.break())
unless input_offset[at!: 0], do: ShortDomain.break()
Final.break()
end
return(0)
end
input_offset
end
def run(input) do
{:ok, pid} = Wasmex.start_link(%{bytes: Orb.to_wat(__MODULE__)})
{:ok, memory} = Wasmex.memory(pid)
{:ok, store} = Wasmex.store(pid)
{:ok, [input_offset]} = Wasmex.call_function(pid, :input_offset, [])
Wasmex.Memory.write_binary(store, memory, input_offset, input <> "\0")
{:ok, [result]} = Wasmex.call_function(pid, :parse, [])
case result do
0 ->
nil
offset ->
Wasmex.Memory.read_string(store, memory, offset, 255)
|> String.replace_trailing("\0", "")
end
end
end
We can run the WebAssembly module in Elixir using Wasmex
# source = Orb.to_wat(YouTubeURLParser)
# File.write!(Path.join([File.cwd!(), "Desktop", "youtube.wat"]), source)
[
"http://www.youtube.com/watch?v=hello",
"https://www.youtube.com/watch?v=hello",
"https://youtube.com/watch?v=hello",
"https://youtu.be/hello",
"//www.youtube.com/watch?v=hello",
"//youtu.be/hello",
"www.youtube.com/watch?v=hello",
# These should fail
"www.youtu.be/watch?v=hello",
"https://www.youtube.com/watch",
"https://www.youtube.com/watch?v=",
"https://youtu.be/",
"hello"
]
|> Enum.map(&YouTubeURLParser.run/1)