Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

NHL Games

NHL_001.livemd

NHL Games

Load data

Data source:

Mix.install([
  {:vega_lite, "~> 0.1.3"},
  {:kino, "~> 0.5.0"},
  {:jason, "~> 1.2"},
  {:explorer, "~> 0.1.0-dev", github: "elixir-nx/explorer", branch: "main"}
])

Aliases to reduce typing:

alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: S
alias Explorer.Datasets, as: DS

Relationships across tables:

Table relationships

{:ok, games} = DF.read_csv("~/Data/NHL/game.csv")
new_dt =
  games["date_time_GMT"]
  |> S.to_list()
  |> Enum.map(fn s ->
    # resp = DateTime.from_iso8601(s)
    resp = NaiveDateTime.from_iso8601(s)

    case resp do
      {:ok, dt, _} -> dt
      {:ok, dt} -> dt
      other -> IO.inspect(other, label: "unprocessable")
    end
  end)
  |> S.from_list()
games = games |> DF.mutate(date_time_GMT: new_dt)
DF.table(games)
{:ok, teams} = DF.read_csv("~/Data/NHL/team_info.csv")
DF.table(teams, 33)
kw_teams = teams |> DF.to_map() |> Keyword.new()
kw_teams[:abbreviation]
|> Enum.zip(
  Enum.zip([kw_teams[:shortName], kw_teams[:teamName]])
  |> Enum.map(fn {x, y} -> x <> " " <> y end)
)
|> Enum.map(fn {abbrev, name} -> %{abbrev: abbrev, name: name} end)
DF.head(teams)
abbrvs = teams |> DF.select(["team_id", "abbreviation"])
abbrvs["abbreviation"]
home_teams = abbrvs |> DF.rename(team_id: "home_team_id") |> DF.rename(abbreviation: "home_team")
away_teams = abbrvs |> DF.rename(team_id: "away_team_id") |> DF.rename(abbreviation: "away_team")
games =
  games
  |> DF.join(away_teams)
  |> DF.join(home_teams)
  |> DF.mutate(home_goal_diff: &amp;S.subtract(&amp;1["home_goals"], &amp;1["away_goals"]))
games
|> DF.group_by("home_team")
|> DF.summarise(home_goal_diff: [:min, :mean, :max], outcome: [:n_unique])
defmodule NHL.Games do
  @game_data "~/Data/NHL/game.csv"
  @teams_data "~/Data/NHL/team_info.csv"

  def create_from_raw_data(game_data \\ @game_data, teams_data \\ @teams_data) do
    {:ok, games} = DF.read_csv(game_data)
    games = games |> convert_date_time_gmt()
    {:ok, teams} = DF.read_csv(teams_data)
    abbrvs = teams |> DF.select(["team_id", "abbreviation"])

    home_teams =
      abbrvs |> DF.rename(team_id: "home_team_id") |> DF.rename(abbreviation: "home_team")

    away_teams =
      abbrvs |> DF.rename(team_id: "away_team_id") |> DF.rename(abbreviation: "away_team")

    games
    |> DF.join(away_teams)
    |> DF.join(home_teams)
    |> DF.mutate(home_goal_diff: &amp;S.subtract(&amp;1["home_goals"], &amp;1["away_goals"]))
  end

  def convert_date_time_gmt(games) do
    new_dt =
      games["date_time_GMT"]
      |> S.to_list()
      |> Enum.map(fn s ->
        # resp = DateTime.from_iso8601(s)
        resp = NaiveDateTime.from_iso8601(s)

        case resp do
          # I'd rather keep these as NaiveDateTimes, but that causes problems
          {:ok, dt, _} -> dt
          {:ok, dt} -> dt
          other -> IO.inspect(other, label: "unprocessable")
        end
      end)
      |> S.from_list()

    games =
      games
      |> DF.mutate(date_time_GMT: new_dt)

    new_date_string =
      games["date_time_GMT"]
      |> S.to_list()
      |> Enum.map(fn dt ->
        dt |> NaiveDateTime.to_date() |> Date.to_string()
      end)

    games |> DF.mutate(date: new_date_string)

    # |> DF.mutate(date_time_GMT: &S.cast(&1["date_time_GMT"], :datetime))
    # |> DF.mutate(date_time_GMT: &S.cast(&1["date_time_GMT"], :date))
  end

  def home_mask(df, team_abbrv) do
    df["home_team"] |> S.equal(team_abbrv) |> S.to_list()
  end

  def away_mask(df, team_abbrv) do
    df["away_team"] |> S.equal(team_abbrv) |> S.to_list()
  end

  def filter_home_games(df, team_abbrv) do
    df |> DF.filter(&amp;S.equal(&amp;1["home_team"], team_abbrv))
  end

  def filter_away_games(df, team_abbrv) do
    df |> DF.filter(&amp;S.equal(&amp;1["away_team"], team_abbrv))
  end
end
games = NHL.Games.create_from_raw_data()
# bos_home_mask = games["home_team"] |> S.equal("BOS") |> S.to_list()
# bos_home_mask = games |> NHL.Games.home_mask("BOS")
# bos_home_games = games |> DF.filter(&S.equal(&1["home_team"], "BOS"))
bos_home_games = games |> NHL.Games.filter_home_games("BOS")
bos_away_games = games |> NHL.Games.filter_away_games("BOS")
alias VegaLite, as: Vl
bos_games = DF.concat_rows(bos_home_games, bos_away_games)
bos =
  bos_games
  |> DF.select(["date_time_GMT", "home_goal_diff"])
  |> DF.mutate(date_time_GMT: &amp;S.cast(&amp;1["date_time_GMT"], :date))
  # |> DF.mutate(date_time_GMT: &S.cast(&1["date_time_GMT"], :string))
  |> DF.to_map()
  |> Keyword.new()

bos =
  bos
  |> Keyword.put(
    :date_time_GMT,
    bos[:date_time_GMT]
    |> Enum.map(&amp;Date.to_string/1)
  )

Vl.new(height: 600, width: 1200)
|> Vl.data_from_series(bos)
|> Vl.mark(:point)
|> Vl.encode_field(:x, "date_time_GMT", type: :temporal)
|> Vl.encode_field(:y, "home_goal_diff", type: :quantitative)
bos_201819 =
  bos_games
  # |> DF.select(["date_time_GMT", "home_goal_diff", "season"])
  |> DF.filter(&amp;S.equal(&amp;1["season"], 20_182_019))
  |> DF.mutate(date_time_GMT: &amp;S.cast(&amp;1["date_time_GMT"], :date))
  # |> DF.mutate(date_time_GMT: &S.cast(&1["date_time_GMT"], :string))
  |> DF.to_map()
  |> Keyword.new()

bos_201819 =
  bos_201819
  |> Keyword.put(
    :date_time_GMT,
    bos_201819[:date_time_GMT]
    |> Enum.map(&amp;Date.to_string/1)
  )

Vl.new(height: 600, width: 1200)
|> Vl.data_from_series(bos_201819)
|> Vl.mark(:point, tooltip: %{content: "data"})
|> Vl.encode_field(:x, "date_time_GMT", type: :temporal)
|> Vl.encode_field(:y, "home_goal_diff", type: :quantitative)
# buf_home_games = games |> DF.filter(&S.equal(&1["home_team"], "BUF"))
# buf_home_games = games |> NHL.Games.filter_home_games("BUF")
# mtl_home_games = games |> DF.filter(&S.equal(&1["home_team"], "MTL"))
# mtl_home_games = games |> NHL.Games.filter_home_games("MTL")
# bruins_canadiens_sabres = DF.concat_rows([mtl_home_games, bos_home_games, buf_home_games])
# dat = bruins_canadiens_sabres |> DF.to_map() |> Keyword.new()

First and last games:

sorted = games["date_time_GMT"] |> S.sort() |> S.to_list()
%{first_game: sorted |> List.first(), last_game: sorted |> List.last()}
# dat
# |> Keyword.fetch!(:date_time_GMT)
# |> Enum.sort()
# |> (fn list -> %{first_game: List.first(list), last_game: List.last(list)} end).()
outcomes = games |> DF.select(["outcome"]) |> DF.to_map() |> Keyword.new()

Based on

Vl.new(height: 600, width: 600)
|> Vl.data_from_series(outcomes)
|> Vl.mark(:bar, filled: true)
|> Vl.encode_field(:x, "outcome", type: :nominal)
|> Vl.encode_field(:y, "outcome", aggregate: :count)
# mtl_dat = mtl_home_games |> DF.to_map() |> Keyword.new()
# Vl.new(height: 600, width: 600)
# |> Vl.data_from_series(mtl_dat)
# |> Vl.mark(:point, filled: true, tooltip: %{content: "data"})
# |> Vl.encode_field(:x, "away_goals", type: :quantitative)
# |> Vl.encode_field(:y, "home_goals", type: :quantitative)
# |> Vl.encode(:size, aggregate: :count)
# |> Vl.encode(:color, field: "home_team", type: :nominal)
bos_dat =
  bos_games
  |> DF.select(["away_goals", "home_goals", "home_team"])
  |> DF.to_map()
  |> Keyword.new()

Vl.new(height: 600, width: 600)
|> Vl.data_from_series(bos_dat)
|> Vl.mark(:point, filled: true, tooltip: %{content: "data"})
|> Vl.encode_field(:x, "away_goals", type: :quantitative)
|> Vl.encode_field(:y, "home_goals", type: :quantitative)
|> Vl.encode(:size, aggregate: :count)
|> Vl.encode(:color, field: "home_team", type: :nominal)

Based on :

# {:ok, game_teams_stats} = DF.read_csv("~/Data/NHL/game_teams_stats.csv")
# home_game_teams_stats = game_teams_stats
# |> NHL.GameTeam.filter_home_games()
# # |> DF.rename(team_id: "home_team_id")
# away_game_teams_stats = game_teams_stats
# |> NHL.GameTeam.filter_away_games()
# # |> DF.rename(team_id: "away_team_id")
# all_game_teams_stats = DF.concat_rows([away_game_teams_stats, home_game_teams_stats])
# all_vl = all_game_teams_stats |> DF.to_map() |> Keyword.new()
# home = home_game_teams_stats
# |> DF.rename(team_id: "home_team_id")
# |> DF.join(games, how: :inner)
# away = away_game_teams_stats
# |> DF.rename(team_id: "away_team_id")
# |> DF.join(games, how: :inner)
defmodule NHL.GameTeam do
  def create_from_raw_data() do
    games = NHL.Games.create_from_raw_data()

    raw_game_teams_stats = create_raw_table_from_raw_data()

    home_game_teams_stats = raw_game_teams_stats |> filter_home_games()

    away_game_teams_stats = raw_game_teams_stats |> filter_away_games()

    home =
      home_game_teams_stats
      |> DF.rename(team_id: "home_team_id")
      |> DF.join(games, how: :inner)

    away =
      away_game_teams_stats
      |> DF.rename(team_id: "away_team_id")
      |> DF.join(games, how: :inner)

    DF.concat_rows(home, away)
  end

  defp create_raw_table_from_raw_data() do
    {:ok, game_teams_stats} = DF.read_csv("~/Data/NHL/game_teams_stats.csv")
    game_teams_stats
  end

  def home_mask(df) do
    df["HoA"] |> S.equal("home")
  end

  def away_mask(df) do
    df["HoA"] |> S.equal("away")
  end

  def filter_home_games(df) do
    df |> DF.filter(&amp;S.equal(&amp;1["HoA"], "home"))
  end

  def filter_away_games(df) do
    df |> DF.filter(&amp;S.equal(&amp;1["HoA"], "away"))
  end
end
# all = DF.concat_rows(home, away)
all = NHL.GameTeam.create_from_raw_data()
# Some day, this shouldn't be necessary, but it seems necessary presently to work
# around an apparent limitation with DateTimes

# all = all
#   |> DF.mutate(date_time_GMT: &S.cast(&1["date_time_GMT"], :date))

# dt = all
# |> DF.select(["date_time_GMT"])
# |> DF.to_map()
# |> Keyword.new()
# |> Keyword.get(:date_time_GMT)

# dt_as_string_series = dt
# |> Enum.map(&Date.to_string/1)
# |> S.from_list()

# all = all
# |> DF.mutate(date_time_GMT: dt_as_string_series)
all_kw = all |> DF.to_map() |> Keyword.new() |> Keyword.drop([:date_time_GMT])
Vl.new(height: 600, width: 600)
|> Vl.data_from_series(all_kw)
|> Vl.mark(:circle, opacity: 0.5)
|> Vl.encode_field(:x, "pim", type: :quantitative)
|> Vl.encode_field(:y, "goals", type: :quantitative)
|> Vl.encode(:size, aggregate: :count)
|> Vl.encode(:color, field: "won", type: :nominal)
Vl.new(height: 600, width: 600)
|> Vl.data_from_series(all_kw)
|> Vl.mark(:circle, filled: true, tooltip: %{content: "data"})
|> Vl.encode_field(:x, "away_goals", type: :quantitative)
|> Vl.encode_field(:y, "home_goals", type: :quantitative)
|> Vl.encode(:size, aggregate: :count)
|> Vl.encode(:color, field: "home_team", type: :nominal)
Vl.new(height: 600, width: 600)
|> Vl.data_from_series(all_kw)
|> Vl.mark(:circle, opacity: 0.5)
|> Vl.encode_field(:x, "shots", type: :quantitative)
|> Vl.encode_field(:y, "goals", type: :quantitative)
|> Vl.encode(:size, aggregate: :count)
|> Vl.encode(:color, field: "won", type: :nominal)
shots_pim_won = all |> DF.select(["shots", "pim", "won"]) |> DF.to_map() |> Keyword.new()

Vl.new(height: 600, width: 600)
|> Vl.data_from_series(shots_pim_won)
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "shots", type: :ordinal)
|> Vl.encode_field(:y, "pim", type: :ordinal, sort: :descending)
# |> Vl.encode(:size, aggregate: :count)
|> Vl.encode(:color, aggregate: :mean, field: "won")

# |> Vl.config(axis: [{:grid, true}, {:tickBand, :extent}])
# all |> DF.filter(&S.equal(&1["home_team"], "BOS")) |> DF.select(["goals", "home_goals"])
home_goal_diffs =
  all
  |> DF.group_by(["home_team"])
  |> DF.summarise(home_goal_diff: [:mean])
  |> DF.arrange(desc: :home_goal_diff_mean)
  |> DF.to_map()
  |> Keyword.new()

Vl.new(height: 600, width: 600)
|> Vl.data_from_series(home_goal_diffs)
|> Vl.mark(:bar, filled: true)
|> Vl.encode_field(:x, "home_team", type: :nominal)
|> Vl.encode_field(:y, "home_goal_diff_mean", type: :quantitative)
away_goal_diffs =
  all
  |> DF.group_by(["away_team"])
  |> DF.summarise(home_goal_diff: [:mean])
  |> DF.arrange(desc: :home_goal_diff_mean)
  |> DF.to_map()
  |> Keyword.new()

Vl.new(height: 600, width: 600)
|> Vl.data_from_series(away_goal_diffs)
|> Vl.mark(:bar, filled: true)
|> Vl.encode_field(:x, "away_team", type: :nominal)
|> Vl.encode_field(:y, "home_goal_diff_mean", type: :quantitative)
home =
  home_goal_diffs
  |> Enum.map(fn
    {:home_team, ht} -> {:team, ht}
    {:home_goal_diff_mean, diff} -> {:home_goal_diff_mean, diff}
  end)
  |> Enum.into(%{})
  |> DF.from_map()
away =
  away_goal_diffs
  |> Enum.map(fn
    {:away_team, at} -> {:team, at}
    {:home_goal_diff_mean, diff} -> {:away_goal_diff_mean, diff |> Enum.map(fn v -> -v end)}
  end)
  |> Enum.into(%{})
  |> DF.from_map()
home_away = DF.join(home, away)
home_away_dat = home_away |> DF.to_map() |> Keyword.new()
Vl.new(width: 600, height: 600)
|> Vl.data_from_series(home_away_dat)
|> Vl.mark(:point, tooltip: true)
|> Vl.encode_field(:x, "home_goal_diff_mean", type: :quantitative)
|> Vl.encode_field(:y, "away_goal_diff_mean", type: :quantitative)
|> Vl.encode_field(:color, "team", type: :nominal)
all |> DF.filter(&amp;S.equal(&amp;1["game_id"], 2_016_020_045))
# vl_all = all |> DF.to_map() |> Keyword.new()

yXBe2

y = XB + e

beta_hat

defmodule Regression do
  @doc """
  E.g.:
  x1 = [1, 2, 3, 4]
  x2 = [1, 2, 3, 5]
  """
  def bivariate(y, x1, x2)
      when is_list(y) and is_list(x1) and is_list(x2) and length(x1) == length(x2) do
    %{y: y, x1: x1, x2: x2} = remove_observations_with_nils(y, x1, x2)
    x0 = List.duplicate(1, length(x1))
    x = Nx.tensor([x0, x1, x2]) |> Nx.transpose()
    y = Nx.tensor(y)
    x_prime_x = Nx.dot(Nx.transpose(x), x)
    x_prime_y = Nx.dot(Nx.transpose(x), y)
    b_hat = Nx.dot(Nx.LinAlg.invert(x_prime_x), x_prime_y)
    y_hat = Nx.dot(x, b_hat)
    %{b_hat: b_hat, y_hat: y_hat}
  end

  def remove_observations_with_nils(y, x1, x2) do
    [y, x1, x2]
    |> Enum.zip()
    |> Enum.reduce(
      %{y: [], x1: [], x2: []},
      fn
        {y_val, x1_val, x2_val}, acc when is_nil(y_val) or is_nil(x1_val) or is_nil(x2_val) ->
          acc

        {y_val, x1_val, x2_val}, %{y: ys, x1: x1s, x2: x2s} ->
          %{y: [y_val | ys], x1: [x1_val | x1s], x2: [x2_val | x2s]}
      end
    )
    |> (fn %{y: ys, x1: x1s, x2: x2s} ->
          %{y: ys |> Enum.reverse(), x1: x1s |> Enum.reverse(), x2: x2s |> Enum.reverse()}
        end).()
  end
end
Regression.bivariate(all_kw[:goals], all_kw[:shots], all_kw[:powerPlayOpportunities])
Vl.new(height: 600, width: 600)
|> Vl.data_from_series(all_kw)
|> Vl.transform(filter: "datum.home_team == 'BOS'")
|> Vl.mark(:point, filled: true)
|> Vl.encode_field(:x, "shots", type: :quantitative)
|> Vl.encode_field(:y, "goals", type: :quantitative)
|> Vl.encode(:size, aggregate: :count)
|> Vl.encode(:color, field: "won", type: :nominal)