Stock Returns Based On PE Ratios
Mix.install(
[
{:nx, "~> 0.6.4"},
{:explorer, "~> 0.8.0"},
{:kino, "~> 0.12.2"},
{:kino_vega_lite, "~> 0.1.11"},
{:kino_explorer, "~> 0.1.15"},
{:exla, "~> 0.6.4"},
{:axon, "~> 0.6.0"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Read In The CSVs
require Explorer.DataFrame, as: DF
paths =
"~/Dropbox/stocks/*.csv"
|> Path.expand()
|> Path.wildcard()
dates =
Enum.map(paths, fn path ->
Code.eval_quoted(
quote do
Date.new(
unquote_splicing(
Enum.map(String.split(Path.basename(path, ".csv"), "-"), &String.to_integer/1)
)
)
end
)
end)
dates = Enum.map(dates, fn date -> elem(elem(date, 0), 1) end)
dataframes = Enum.map(paths, fn path -> Explorer.DataFrame.from_csv!(path, header: false) end)
dataframes =
Enum.map(dataframes, fn df -> Explorer.DataFrame.rename(df, ["ticker", "price", "pe_ratio"]) end)
date_lists =
Enum.with_index(dataframes, fn df, i ->
fn -> Enum.at(dates, i) end
|> Stream.repeatedly()
|> Enum.take(Explorer.Series.count(df["ticker"]))
end)
date_series = Enum.map(date_lists, fn date_list -> Explorer.DataFrame.new(date: date_list) end)
dataframes =
Enum.with_index(dataframes, fn df, i -> DF.concat_columns(df, Enum.at(date_series, i)) end)
dataframes =
Enum.map(dataframes, fn df ->
DF.mutate(df, %{date: cast(date, :date)})
end)
Concat The Rows
require Explorer.DataFrame, as: DF
total_df = DF.concat_rows(dataframes)
total_df = DF.mutate(total_df, %{ticker: cast(ticker, :category)})
Sort By Ticker
require Explorer.DataFrame, as: DF
tickers = Explorer.Series.categories(total_df["ticker"])
total_df = DF.mutate(total_df, %{ticker: cast(ticker, :string)})
ticker_dfs =
Enum.map(Explorer.Series.to_list(tickers), fn ticker ->
DF.filter(total_df, ticker == ^ticker)
end)
length(ticker_dfs)
Shift By 1 And Take Percent Change
require Explorer.DataFrame, as: DF
shifted_ticker_dfs =
Enum.map(ticker_dfs, fn df ->
DF.mutate(df, %{price_shifted: Explorer.Series.shift(price, 1)})
end)
ticker_dfs_with_percentages =
Enum.map(shifted_ticker_dfs, fn df ->
DF.mutate(df, %{percent_change: (price - price_shifted) / price_shifted})
end)
# drop the first row
ticker_dfs_with_percentages = Enum.map(ticker_dfs_with_percentages, fn df -> DF.drop_nil(df) end)
Shape The Data
prediction_days_text = Kino.Input.text("Num Prediction Days")
prediction_days = String.to_integer(Kino.Input.read(prediction_days_text))
slice_fun = fn df ->
x =
for i <- prediction_days..(elem(DF.shape(df), 0) - 1) do
df
|> DF.select(["price", "pe_ratio", "percent_change"])
|> DF.slice((i - prediction_days)..(i - 1))
end
x =
Enum.map(
x,
fn df ->
Nx.concatenate(
[
Nx.reshape(Explorer.Series.to_tensor(df["price"]), {:auto, 1}),
Nx.reshape(Explorer.Series.to_tensor(df["pe_ratio"]), {:auto, 1}),
Nx.reshape(Explorer.Series.to_tensor(df["percent_change"]), {:auto, 1})
],
axis: 1
)
end
)
x = Enum.into(x, <<>>, fn df -> Nx.to_binary(df) end)
x = Nx.from_binary(x, :f64)
y =
for i <- prediction_days..(elem(DF.shape(df), 0) - 1) do
Explorer.Series.to_tensor(DF.slice(df, [i])["percent_change"])
end
num_days = length(y)
x = Nx.reshape(x, {num_days, prediction_days, 3})
y = Nx.concatenate(y)
[x, y]
end
sliced_frames = Enum.map(ticker_dfs_with_percentages, fn df -> [df["ticker"], slice_fun.(df)] end)
Train/Test Split
test_num_text = Kino.Input.text("Number of Test Cases")
require Explorer.DataFrame, as: DF
test_num = String.to_integer(Kino.Input.read(test_num_text))
train_test_split_fun = fn x_tensors, y_tensors ->
train_length = elem(Nx.shape(x_tensors), 0) - test_num
# I am not sure if the following is correct:
# x_tensors = Nx.as_type(x_tensors, :f32)
# y_tensors = Nx.as_type(y_tensors, :f32)
# ------------------------------------------
[x_train, y_train] = [
Nx.slice(x_tensors, [0, 0, 0], [train_length, prediction_days, 3]),
Nx.slice(y_tensors, [0], [train_length])
]
[x_test, y_test] = [
Nx.slice(x_tensors, [train_length, 0, 0], [test_num, prediction_days, 3]),
Nx.slice(y_tensors, [train_length], [test_num])
]
[
[
Nx.as_type(x_train, :f32),
Nx.as_type(y_train, :f32)
],
[
Nx.as_type(x_test, :f32),
Nx.as_type(y_test, :f32)
]
]
end
train_test_split =
sliced_frames
|> Enum.filter(fn [_, [x_tensor, _]] -> elem(Nx.shape(x_tensor), 0) > test_num end)
|> Enum.map(fn [tickers, [x, y]] -> [tickers[0], train_test_split_fun.(x, y)] end)
Build The Model
[ticker, [[first_item_x_train, first_item_y_train], [first_item_x_test, first_item_y_test]]] =
train_test_split |> Enum.at(0)
batch_size = 32
build_model = fn ->
input = Axon.input("predictors", shape: {nil, prediction_days, 3})
model =
input
|> Axon.flatten()
|> Axon.dense(128, activation: :relu)
|> Axon.batch_norm()
|> Axon.dropout(rate: 0.8)
|> Axon.dense(64)
|> Axon.tanh()
|> Axon.dense(1)
end
Batch The Data
batched_x_train =
first_item_x_train
|> Nx.to_batched(batch_size, leftover: :discard)
batched_y_train =
first_item_y_train
|> Nx.to_batched(batch_size, leftover: :discard)
Train The Model And Run It
model = build_model.()
IO.puts(Axon.Display.as_table(model, Nx.template({batch_size, prediction_days, 3}, :f64)))
model = build_model.()
params =
model
|> Axon.Loop.trainer(:mean_squared_error, :adam)
|> Axon.Loop.metric(:mean_absolute_error, "MAE")
|> Axon.Loop.run(Stream.zip(batched_x_train, batched_y_train), %{}, epochs: 25, compiler: EXLA)
IO.puts(ticker)
Axon.predict(model, params, first_item_x_test)