Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Stock Returns Based On PE Ratios

stock-returns-based-on-pe-ratios.livemd

Stock Returns Based On PE Ratios

Mix.install(
  [
    {:nx, "~> 0.6.4"},
    {:explorer, "~> 0.8.0"},
    {:kino, "~> 0.12.2"},
    {:kino_vega_lite, "~> 0.1.11"},
    {:kino_explorer, "~> 0.1.15"},
    {:exla, "~> 0.6.4"},
    {:axon, "~> 0.6.0"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Read In The CSVs

require Explorer.DataFrame, as: DF

paths =
  "~/Dropbox/stocks/*.csv"
  |> Path.expand()
  |> Path.wildcard()

dates =
  Enum.map(paths, fn path ->
    Code.eval_quoted(
      quote do
        Date.new(
          unquote_splicing(
            Enum.map(String.split(Path.basename(path, ".csv"), "-"), &String.to_integer/1)
          )
        )
      end
    )
  end)

dates = Enum.map(dates, fn date -> elem(elem(date, 0), 1) end)
dataframes = Enum.map(paths, fn path -> Explorer.DataFrame.from_csv!(path, header: false) end)

dataframes =
  Enum.map(dataframes, fn df -> Explorer.DataFrame.rename(df, ["ticker", "price", "pe_ratio"]) end)

date_lists =
  Enum.with_index(dataframes, fn df, i ->
    fn -> Enum.at(dates, i) end
    |> Stream.repeatedly()
    |> Enum.take(Explorer.Series.count(df["ticker"]))
  end)

date_series = Enum.map(date_lists, fn date_list -> Explorer.DataFrame.new(date: date_list) end)

dataframes =
  Enum.with_index(dataframes, fn df, i -> DF.concat_columns(df, Enum.at(date_series, i)) end)

dataframes =
  Enum.map(dataframes, fn df ->
    DF.mutate(df, %{date: cast(date, :date)})
  end)

Concat The Rows

require Explorer.DataFrame, as: DF

total_df = DF.concat_rows(dataframes)
total_df = DF.mutate(total_df, %{ticker: cast(ticker, :category)})

Sort By Ticker

require Explorer.DataFrame, as: DF

tickers = Explorer.Series.categories(total_df["ticker"])
total_df = DF.mutate(total_df, %{ticker: cast(ticker, :string)})

ticker_dfs =
  Enum.map(Explorer.Series.to_list(tickers), fn ticker ->
    DF.filter(total_df, ticker == ^ticker)
  end)

length(ticker_dfs)

Shift By 1 And Take Percent Change

require Explorer.DataFrame, as: DF

shifted_ticker_dfs =
  Enum.map(ticker_dfs, fn df ->
    DF.mutate(df, %{price_shifted: Explorer.Series.shift(price, 1)})
  end)

ticker_dfs_with_percentages =
  Enum.map(shifted_ticker_dfs, fn df ->
    DF.mutate(df, %{percent_change: (price - price_shifted) / price_shifted})
  end)

# drop the first row
ticker_dfs_with_percentages = Enum.map(ticker_dfs_with_percentages, fn df -> DF.drop_nil(df) end)

Shape The Data

prediction_days_text = Kino.Input.text("Num Prediction Days")
prediction_days = String.to_integer(Kino.Input.read(prediction_days_text))

slice_fun = fn df ->
  x =
    for i <- prediction_days..(elem(DF.shape(df), 0) - 1) do
      df
      |> DF.select(["price", "pe_ratio", "percent_change"])
      |> DF.slice((i - prediction_days)..(i - 1))
    end

  x =
    Enum.map(
      x,
      fn df ->
        Nx.concatenate(
          [
            Nx.reshape(Explorer.Series.to_tensor(df["price"]), {:auto, 1}),
            Nx.reshape(Explorer.Series.to_tensor(df["pe_ratio"]), {:auto, 1}),
            Nx.reshape(Explorer.Series.to_tensor(df["percent_change"]), {:auto, 1})
          ],
          axis: 1
        )
      end
    )

  x = Enum.into(x, <<>>, fn df -> Nx.to_binary(df) end)
  x = Nx.from_binary(x, :f64)

  y =
    for i <- prediction_days..(elem(DF.shape(df), 0) - 1) do
      Explorer.Series.to_tensor(DF.slice(df, [i])["percent_change"])
    end

  num_days = length(y)
  x = Nx.reshape(x, {num_days, prediction_days, 3})
  y = Nx.concatenate(y)

  [x, y]
end

sliced_frames = Enum.map(ticker_dfs_with_percentages, fn df -> [df["ticker"], slice_fun.(df)] end)

Train/Test Split

test_num_text = Kino.Input.text("Number of Test Cases")
require Explorer.DataFrame, as: DF

test_num = String.to_integer(Kino.Input.read(test_num_text))

train_test_split_fun = fn x_tensors, y_tensors ->
  train_length = elem(Nx.shape(x_tensors), 0) - test_num

  # I am not sure if the following is correct:
  # x_tensors = Nx.as_type(x_tensors, :f32)
  # y_tensors = Nx.as_type(y_tensors, :f32)
  # ------------------------------------------

  [x_train, y_train] = [
    Nx.slice(x_tensors, [0, 0, 0], [train_length, prediction_days, 3]),
    Nx.slice(y_tensors, [0], [train_length])
  ]

  [x_test, y_test] = [
    Nx.slice(x_tensors, [train_length, 0, 0], [test_num, prediction_days, 3]),
    Nx.slice(y_tensors, [train_length], [test_num])
  ]

  [
    [
      Nx.as_type(x_train, :f32),
      Nx.as_type(y_train, :f32)
    ],
    [
      Nx.as_type(x_test, :f32),
      Nx.as_type(y_test, :f32)
    ]
  ]
end

train_test_split =
  sliced_frames
  |> Enum.filter(fn [_, [x_tensor, _]] -> elem(Nx.shape(x_tensor), 0) > test_num end)
  |> Enum.map(fn [tickers, [x, y]] -> [tickers[0], train_test_split_fun.(x, y)] end)

Build The Model

[ticker, [[first_item_x_train, first_item_y_train], [first_item_x_test, first_item_y_test]]] =
  train_test_split |> Enum.at(0)

batch_size = 32

build_model = fn ->
  input = Axon.input("predictors", shape: {nil, prediction_days, 3})

  model =
    input
    |> Axon.flatten()
    |> Axon.dense(128, activation: :relu)
    |> Axon.batch_norm()
    |> Axon.dropout(rate: 0.8)
    |> Axon.dense(64)
    |> Axon.tanh()
    |> Axon.dense(1)
end

Batch The Data

batched_x_train =
  first_item_x_train
  |> Nx.to_batched(batch_size, leftover: :discard)

batched_y_train =
  first_item_y_train
  |> Nx.to_batched(batch_size, leftover: :discard)

Train The Model And Run It

model = build_model.()
IO.puts(Axon.Display.as_table(model, Nx.template({batch_size, prediction_days, 3}, :f64)))
model = build_model.()

params =
  model
  |> Axon.Loop.trainer(:mean_squared_error, :adam)
  |> Axon.Loop.metric(:mean_absolute_error, "MAE")
  |> Axon.Loop.run(Stream.zip(batched_x_train, batched_y_train), %{}, epochs: 25, compiler: EXLA)
IO.puts(ticker)
Axon.predict(model, params, first_item_x_test)