Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Recurrent Neural Networks

lib/RNN.livemd

Recurrent Neural Networks

Mix.install([
  {:scidata, "~> 0.1"},
  {:axon, "~> 0.5"},
  {:exla, "~> 0.6"},
  {:nx, "~> 0.6"},
  {:table_rex, "~> 3.1.1"},
  {:kino, "~> 0.7"}
])

Main

Nx.default_backend(EXLA.Backend)
# Download Data
data = Scidata.IMDBReviews.download()
# Split Data
{train_data, test_data} =
  data.review
  |> Enum.zip(data.sentiment)
  |> Enum.shuffle()
  |> Enum.split(23_000)
# Tokenization and Vectorization into Sparse Representation
frequencies =
  Enum.reduce(train_data, %{}, fn {review, _}, tokens ->
    review
    |> String.downcase()
    |> String.replace(~r/[\p{P}\p{S}]/, "")
    |> String.split()
    |> Enum.reduce(tokens, &Map.update(&2, &1, 1, fn x -> x + 1 end))
  end)
num_tokens = 1024

review = "The Departed is Martin Scorsese's best work, and anybody who disagrees is wrong. This movie is amazing."

unknown_token = 0

tokens =
  frequencies
  |> Enum.sort_by(&elem(&1, 1), :desc)
  |> Enum.take(num_tokens)
  |> Enum.with_index(fn {token, _}, i -> {token, i} end)
  |> Map.new()
tokenize = fn review ->
  review
  |> String.downcase()
  |> String.replace(~r/[\p{P}\p{S}]/, "")
  |> String.split()
  |> Enum.map(&Map.get(tokens, &1, unknown_token))
  |> Nx.tensor()
end

tokenize.(review)
# 215