Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Orb YouTube URL parser

examples/youtube-url-parser.livemd

Orb YouTube URL parser

Mix.install([
  {:orb, "~> 0.0.48"},
  {:wasmex, "~> 0.8.3"},
  {:mime, "~> 2.0.5"},
  {:kino, "~> 0.12.0"}
])

Define a helper module for parsing characters

defmodule CharParser do
  use Orb

  def may(chars, offset_mut_ref) when is_list(chars) do
    Orb.snippet do
      chars
      |> Enum.with_index(fn char, index ->
        Memory.load!(I32.U8, offset_mut_ref.read + index) === char
      end)
      |> Enum.reduce(&I32.band/2)
      |> if do
        offset_mut_ref.read + length(chars)
        offset_mut_ref.write
      end
    end
  end

  def must(chars, offset_mut_ref, fail_instruction) when is_list(chars) do
    Orb.snippet do
      unless may(chars, offset_mut_ref) do
        fail_instruction
      end
    end
  end

  # Future API?
  # CharParser.seq(mut!(input_offset), return(0), must: ~c|http|, may: ~c|s|, must: ~c|:|, may: ~c|//|)

  # Future API?
  # CharParser.one_of([
  #   [may: ~c|www.|, must: ~c|youtube.com|], must: ~c|/watch?v=|],
  #   [must: ~c|youtu.be/|]
  # ], mut!(input_offset), return(0))

  def print(ast) do
    IO.puts(Orb.ToWat.to_wat(ast, "") |> IO.iodata_to_binary())
  end
end

Define our module using Orb

defmodule YouTubeURLParser do
  use Orb

  Memory.pages(2)
  defp start_page_offset(), do: 1

  defw(input_offset(), I32, do: start_page_offset() * Memory.page_byte_size())

  defw parse(), I32, input_offset: I32.UnsafePointer do
    input_offset = start_page_offset() * Memory.page_byte_size()

    # input_offset[~c|http|]

    Control.block OptionalScheme do
      CharParser.must(~c|http|, mut!(input_offset), OptionalScheme.break())
      # unless CharParser.may(~c|http|, mut!(input_offset)), do: OptionalScheme.break()
      CharParser.may(~c|s|, mut!(input_offset))
      CharParser.must(~c|:|, mut!(input_offset), OptionalScheme.break())
    end

    CharParser.may(~c|//|, mut!(input_offset))

    Control.block Final do
      Control.block ShortDomain do
        input_offset =
          Orb.Stack.push input_offset do
            Control.block FullDomain do
              CharParser.may(~c|www.|, mut!(input_offset))
              CharParser.must(~c|youtube.com|, mut!(input_offset), FullDomain.break())
              CharParser.must(~c|/watch?v=|, mut!(input_offset), FullDomain.break())
              unless input_offset[at!: 0], do: FullDomain.break()
              Final.break()
            end
          end

        CharParser.must(~c|youtu.be/|, mut!(input_offset), ShortDomain.break())
        unless input_offset[at!: 0], do: ShortDomain.break()
        Final.break()
      end

      return(0)
    end

    input_offset
  end

  def run(input) do
    {:ok, pid} = Wasmex.start_link(%{bytes: Orb.to_wat(__MODULE__)})
    {:ok, memory} = Wasmex.memory(pid)
    {:ok, store} = Wasmex.store(pid)

    {:ok, [input_offset]} = Wasmex.call_function(pid, :input_offset, [])
    Wasmex.Memory.write_binary(store, memory, input_offset, input <> "\0")

    {:ok, [result]} = Wasmex.call_function(pid, :parse, [])

    case result do
      0 ->
        nil

      offset ->
        Wasmex.Memory.read_string(store, memory, offset, 255)
        |> String.replace_trailing("\0", "")
    end
  end
end

We can run the WebAssembly module in Elixir using Wasmex

# source = Orb.to_wat(YouTubeURLParser)
# File.write!(Path.join([File.cwd!(), "Desktop", "youtube.wat"]), source)

[
  "http://www.youtube.com/watch?v=hello",
  "https://www.youtube.com/watch?v=hello",
  "https://youtube.com/watch?v=hello",
  "https://youtu.be/hello",
  "//www.youtube.com/watch?v=hello",
  "//youtu.be/hello",
  "www.youtube.com/watch?v=hello",
  # These should fail
  "www.youtu.be/watch?v=hello",
  "https://www.youtube.com/watch",
  "https://www.youtube.com/watch?v=",
  "https://youtu.be/",
  "hello"
]
|> Enum.map(&amp;YouTubeURLParser.run/1)