Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

e-Stat 家計支出

e_stat_household_expense.livemd

e-Stat 家計支出

Mix.install([
  {:explorer, "~> 0.9"},
  {:kino, "~> 0.14"},
  {:kino_vega_lite, "~> 0.1"},
  {:req, "~> 0.5"}
])

準備

alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame

データ取得

出典:政府統計の総合窓口(e-Stat)

「家計調査 / 家計収支編 総世帯 品目分類(2020年改定)(金額)」(統計局統計調査部消費統計課)を加工して利用

API仕様:

app_id_input = Kino.Input.password("APP ID")
api_path = "http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData"
data_id = "0003348234"
response_body =
  "#{api_path}?appId=#{Kino.Input.read(app_id_input)}&statsDataId=#{data_id}"
  |> Req.get!()
  |> Map.get(:body)
rows = String.split(response_body, "\n")
value_index = Enum.find_index(rows, &(&1 == "\"VALUE\""))
expenses_df =
  rows
  |> Enum.slice((value_index + 1)..-1)
  |> Enum.join("\n")
  |> DataFrame.load_csv!()

Kino.DataTable.new(expenses_df)
expenses_df["時間軸(年次)"]
|> Series.frequencies()
|> DataFrame.sort_by(asc: values)
|> Kino.DataTable.new()
expenses_df[["cat01_code", "品目分類(2020年改定)"]]
|> DataFrame.distinct()
|> Kino.DataTable.new()
expenses_df[["cat01_code", "品目分類(2020年改定)"]]
|> DataFrame.filter(
  remainder(cat01_code, 10_000_000) == 0 and
    cat01_code <= 100_000_000
)
|> DataFrame.distinct()
|> Kino.DataTable.new()
expenses_df[["cat02_code", "世帯区分"]]
|> DataFrame.distinct()
|> Kino.DataTable.new()
total_df =
  expenses_df
  |> DataFrame.filter(
    time_code == 2_022_000_000 and
      cat01_code == 1_100_000
  )
  |> DataFrame.select(["世帯区分", "value"])

Kino.DataTable.new(total_df)

大分類毎の割合

total =
  expenses_df
  |> DataFrame.filter(
    time_code == 2_022_000_000 and
      cat02_code == 11 and
      cat01_code == 1_100_000
  )
  |> DataFrame.pull("value")
  |> Series.to_list()
  |> List.first()
latest_df =
  expenses_df
  |> DataFrame.filter(
    time_code == 2_022_000_000 and
      cat02_code == 11 and
      remainder(cat01_code, 10_000_000) == 0 and
      cat01_code <= 100_000_000
  )
  |> DataFrame.select(["品目分類(2020年改定)", "value"])
  |> DataFrame.rename(["品目分類", "支出金額"])
  # 小数点以下1桁の%に変換
  |> DataFrame.mutate(割合: cast(支出金額 / ^total * 1000, :integer) / 10)

Kino.DataTable.new(latest_df)
items = Series.to_list(latest_df["品目分類"])
expenses = Series.to_list(latest_df["支出金額"])

VegaLite.new(width: 700, title: "支出金額内訳")
|> VegaLite.data_from_values(x: items, y: expenses)
|> VegaLite.mark(:bar, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :nominal, title: "品目分類", sort: "-y")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")
VegaLite.new(title: "支出金額内訳")
|> VegaLite.data_from_values(x: items, y: expenses)
|> VegaLite.mark(:arc, inner_radius: 50, tooltip: true)
|> VegaLite.encode_field(:color, "x", type: :nominal, title: "品目分類")
|> VegaLite.encode_field(:theta, "y", type: :quantitative, title: "支出金額")

各値の推移

plot_line = fn code_list, title, value_title ->
  target_df =
    expenses_df
    |> DataFrame.filter(cat01_code in ^code_list and cat02_code == 11)
    |> DataFrame.select(["時間軸(年次)", "value", "品目分類(2020年改定)"])
    |> DataFrame.rename(["年", value_title, "品目分類"])

  year = Series.to_list(target_df["年"])
  expenses = Series.to_list(target_df[value_title])
  category = Series.to_list(target_df["品目分類"])

  VegaLite.new(width: 600, title: title)
  |> VegaLite.data_from_values(x: year, y: expenses, color: category)
  |> VegaLite.mark(:line, tooltip: true)
  |> VegaLite.encode_field(:x, "x", type: :ordinal, title: "年")
  |> VegaLite.encode_field(:y, "y", type: :quantitative, title: value_title)
  |> VegaLite.encode_field(:color, "color", title: "品目分類")
end
plot_line.([1_100_000], "総支出額推移", "金額")
plot_line.([300_000, 400_000, 500_000, 700_000], "世帯人員推移", "人数")
[
  {"食料〜被服", Enum.to_list(10_000_000..50_000_000//10_000_000)},
  {"保健医療〜その他", Enum.to_list(60_000_000..100_000_000//10_000_000)}
]
|> Enum.map(fn {title, list} ->
  {
    title,
    plot_line.(list, "大分類毎支出金額推移", "金額")
  }
end)
|> Kino.Layout.tabs()
plot_line.([10_110_001, 10_120_000, 10_130_000], "支出額推移", "金額")
plot_line.([10_211_000, 10_310_000, 10_510_000, 10_800_000], "支出額推移", "金額")
plot_line.([50_100_000, 50_200_000, 50_700_000], "支出額推移", "金額")
plot_line.([50_210_010, 50_220_010, 50_230_010, 50_600_020], "支出額推移", "金額")
plot_line.([70_300_010, 70_300_020, 70_300_030], "支出額推移", "金額")
plot_line.([90_100_010, 90_100_050], "支出額推移", "金額")
plot_line.([90_300_010, 90_300_020, 90_300_030], "支出額推移", "金額")

特徴の強い項目の抽出

summary_df =
  expenses_df
  |> DataFrame.filter(
    cat02_code == 11 and
      cat01_code <= 100_000_000 and
      unit == "円"
  )
  |> DataFrame.group_by(["cat01_code", "品目分類(2020年改定)"])
  |> DataFrame.summarise(変動係数: standard_deviation(value) / mean(value))
  |> DataFrame.rename(["品目分類", "品目分類名", "変動係数"])

Kino.DataTable.new(summary_df)

変動の少ないもの

summary_df
|> DataFrame.sort_by(asc: 変動係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

変動の大きいもの

summary_df
|> DataFrame.sort_by(desc: 変動係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

単調増加、単調減少

change_df =
  expenses_df
  |> DataFrame.filter(
    cat02_code == 11 and
      cat01_code <= 100_000_000 and
      unit == "円"
  )
  |> DataFrame.select(["世帯区分", "cat01_code", "品目分類(2020年改定)", "時間軸(年次)", "value", "unit"])
  |> DataFrame.rename(["世帯区分", "品目分類", "品目分類名", "年", "金額", "単位"])
  # 年毎の金額を列に展開する
  |> DataFrame.pivot_wider("年", "金額")
  # 16年間存在した品目分類だけを対象にする
  |> DataFrame.filter(
    not contains(品目分類名, "までは") and
      "2007年" |> col() |> is_not_nil()
  )
  |> DataFrame.mutate(経年変化率: (col("2022年") - col("2007年")) / col("2007年"))
  |> DataFrame.sort_by(desc: 経年変化率)

Kino.DataTable.new(change_df)
single_change_df =
  2008..2022
  |> Enum.reduce(change_df, fn year, acc_df ->
    this_year = "#{year}年"
    before_year = "#{year - 1}年"
    col_name = "単年変化率_#{this_year}" |> String.to_atom()

    acc_df
    |> DataFrame.mutate([
      {
        ^col_name,
        (col(^this_year) - col(^before_year)) / col(^before_year)
      }
    ])
  end)
  |> DataFrame.pivot_longer(&amp;String.starts_with?(&amp;1, "単年変化率_"),
    names_to: "変化年",
    values_to: "単年変化率"
  )
  |> DataFrame.group_by("品目分類")
  |> DataFrame.summarise(最大単年変化率: max(単年変化率))

change_df =
  change_df
  |> DataFrame.join(single_change_df)
  |> DataFrame.mutate(単調変化係数: 経年変化率 / (最大単年変化率 |> pow(2) |> pow(0.5)))

Kino.DataTable.new(change_df)
change_df
|> DataFrame.sort_by(asc: 単調変化係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()
change_df
|> DataFrame.sort_by(desc: 単調変化係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナによる影響

covid_df =
  expenses_df
  |> DataFrame.filter(
    cat02_code == 11 and
      cat01_code <= 100_000_000 and
      unit == "円" and
      time_code >= 2_019_000_000
  )
  |> DataFrame.select(["cat01_code", "品目分類(2020年改定)", "時間軸(年次)", "value"])
  |> DataFrame.rename(["品目分類", "品目分類名", "年", "金額"])
  |> DataFrame.filter(not contains(品目分類名, "までは"))
  |> DataFrame.pivot_wider("年", "金額")
  |> DataFrame.mutate(
    コロナ発生時変化率: (col("2020年") - col("2019年")) / col("2019年"),
    コロナ発生後変化率: (col("2022年") - col("2020年")) / col("2019年")
  )

Kino.DataTable.new(covid_df)

コロナによって悪影響を受けたもの

covid_df
|> DataFrame.sort_by(asc: コロナ発生時変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナによって好影響を受けたもの

covid_df
|> DataFrame.sort_by(desc: コロナ発生時変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナ後に回復したもの

covid_df
|> DataFrame.filter(コロナ発生時変化率 < -0.5 and コロナ発生後変化率 > 0.3)
|> DataFrame.sort_by(desc: コロナ発生後変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナ後に回復していないもの

covid_df
|> DataFrame.filter(コロナ発生時変化率 < -0.5 and コロナ発生後変化率 < 0.2)
|> DataFrame.sort_by(asc: コロナ発生後変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()