Notesclub

created by hec & contributors

terms privacy

e-Stat 家計支出

e_stat_household_expense.livemd

Ryo Wakabayashi

@RyoWakabayashi

elixir-learning

Share to X

Share to Bluesky

More notebooks

e-Stat 家計支出

Mix.install([
  {:explorer, "~> 0.9"},
  {:kino, "~> 0.15"},
  {:kino_vega_lite, "~> 0.1"},
  {:req, "~> 0.5"}
])

準備

alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame

データ取得

出典：政府統計の総合窓口(e-Stat)

「家計調査 / 家計収支編総世帯品目分類（2020年改定）（金額）」（統計局統計調査部消費統計課）を加工して利用

API仕様:

app_id_input = Kino.Input.password("APP ID")

api_path = "http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData"

data_id = "0003348234"

response_body =
  "#{api_path}?appId=#{Kino.Input.read(app_id_input)}&statsDataId=#{data_id}"
  |> Req.get!()
  |> Map.get(:body)

rows = String.split(response_body, "\n")

value_index = Enum.find_index(rows, &amp;(&amp;1 == "\"VALUE\""))

expenses_df =
  rows
  |> Enum.slice((value_index + 1)..-1)
  |> Enum.join("\n")
  |> DataFrame.load_csv!()

Kino.DataTable.new(expenses_df)

expenses_df["時間軸（年次）"]
|> Series.frequencies()
|> DataFrame.sort_by(asc: values)
|> Kino.DataTable.new()

expenses_df[["cat01_code", "品目分類（2020年改定）"]]
|> DataFrame.distinct()
|> Kino.DataTable.new()

expenses_df[["cat01_code", "品目分類（2020年改定）"]]
|> DataFrame.filter(
  remainder(cat01_code, 10_000_000) == 0 and
    cat01_code <= 100_000_000
)
|> DataFrame.distinct()
|> Kino.DataTable.new()

expenses_df[["cat02_code", "世帯区分"]]
|> DataFrame.distinct()
|> Kino.DataTable.new()

total_df =
  expenses_df
  |> DataFrame.filter(
    time_code == 2_022_000_000 and
      cat01_code == 1_100_000
  )
  |> DataFrame.select(["世帯区分", "value"])

Kino.DataTable.new(total_df)

大分類毎の割合

total =
  expenses_df
  |> DataFrame.filter(
    time_code == 2_022_000_000 and
      cat02_code == 11 and
      cat01_code == 1_100_000
  )
  |> DataFrame.pull("value")
  |> Series.to_list()
  |> List.first()

latest_df =
  expenses_df
  |> DataFrame.filter(
    time_code == 2_022_000_000 and
      cat02_code == 11 and
      remainder(cat01_code, 10_000_000) == 0 and
      cat01_code <= 100_000_000
  )
  |> DataFrame.select(["品目分類（2020年改定）", "value"])
  |> DataFrame.rename(["品目分類", "支出金額"])
  # 小数点以下1桁の％に変換
  |> DataFrame.mutate(割合: cast(支出金額 / ^total * 1000, :integer) / 10)

Kino.DataTable.new(latest_df)

items = Series.to_list(latest_df["品目分類"])
expenses = Series.to_list(latest_df["支出金額"])

VegaLite.new(width: 700, title: "支出金額内訳")
|> VegaLite.data_from_values(x: items, y: expenses)
|> VegaLite.mark(:bar, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :nominal, title: "品目分類", sort: "-y")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")

VegaLite.new(title: "支出金額内訳")
|> VegaLite.data_from_values(x: items, y: expenses)
|> VegaLite.mark(:arc, inner_radius: 50, tooltip: true)
|> VegaLite.encode_field(:color, "x", type: :nominal, title: "品目分類")
|> VegaLite.encode_field(:theta, "y", type: :quantitative, title: "支出金額")

各値の推移

plot_line = fn code_list, title, value_title ->
  target_df =
    expenses_df
    |> DataFrame.filter(cat01_code in ^code_list and cat02_code == 11)
    |> DataFrame.select(["時間軸（年次）", "value", "品目分類（2020年改定）"])
    |> DataFrame.rename(["年", value_title, "品目分類"])

  year = Series.to_list(target_df["年"])
  expenses = Series.to_list(target_df[value_title])
  category = Series.to_list(target_df["品目分類"])

  VegaLite.new(width: 600, title: title)
  |> VegaLite.data_from_values(x: year, y: expenses, color: category)
  |> VegaLite.mark(:line, tooltip: true)
  |> VegaLite.encode_field(:x, "x", type: :ordinal, title: "年")
  |> VegaLite.encode_field(:y, "y", type: :quantitative, title: value_title)
  |> VegaLite.encode_field(:color, "color", title: "品目分類")
end

plot_line.([1_100_000], "総支出額推移", "金額")

plot_line.([300_000, 400_000, 500_000, 700_000], "世帯人員推移", "人数")

[
  {"食料〜被服", Enum.to_list(10_000_000..50_000_000//10_000_000)},
  {"保健医療〜その他", Enum.to_list(60_000_000..100_000_000//10_000_000)}
]
|> Enum.map(fn {title, list} ->
  {
    title,
    plot_line.(list, "大分類毎支出金額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

plot_line.([10_110_001, 10_120_000, 10_130_000], "支出額推移", "金額")

plot_line.([10_211_000, 10_310_000, 10_510_000, 10_800_000], "支出額推移", "金額")

plot_line.([50_100_000, 50_200_000, 50_700_000], "支出額推移", "金額")

plot_line.([50_210_010, 50_220_010, 50_230_010, 50_600_020], "支出額推移", "金額")

plot_line.([70_300_010, 70_300_020, 70_300_030], "支出額推移", "金額")

plot_line.([90_100_010, 90_100_050], "支出額推移", "金額")

plot_line.([90_300_010, 90_300_020, 90_300_030], "支出額推移", "金額")

特徴の強い項目の抽出

summary_df =
  expenses_df
  |> DataFrame.filter(
    cat02_code == 11 and
      cat01_code <= 100_000_000 and
      unit == "円"
  )
  |> DataFrame.group_by(["cat01_code", "品目分類（2020年改定）"])
  |> DataFrame.summarise(変動係数: standard_deviation(value) / mean(value))
  |> DataFrame.rename(["品目分類", "品目分類名", "変動係数"])

Kino.DataTable.new(summary_df)

変動の少ないもの

summary_df
|> DataFrame.sort_by(asc: 変動係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

変動の大きいもの

summary_df
|> DataFrame.sort_by(desc: 変動係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

単調増加、単調減少

change_df =
  expenses_df
  |> DataFrame.filter(
    cat02_code == 11 and
      cat01_code <= 100_000_000 and
      unit == "円"
  )
  |> DataFrame.select(["世帯区分", "cat01_code", "品目分類（2020年改定）", "時間軸（年次）", "value", "unit"])
  |> DataFrame.rename(["世帯区分", "品目分類", "品目分類名", "年", "金額", "単位"])
  # 年毎の金額を列に展開する
  |> DataFrame.pivot_wider("年", "金額")
  # 16年間存在した品目分類だけを対象にする
  |> DataFrame.filter(
    not contains(品目分類名, "までは") and
      "2007年" |> col() |> is_not_nil()
  )
  |> DataFrame.mutate(経年変化率: (col("2022年") - col("2007年")) / col("2007年"))
  |> DataFrame.sort_by(desc: 経年変化率)

Kino.DataTable.new(change_df)

single_change_df =
  2008..2022
  |> Enum.reduce(change_df, fn year, acc_df ->
    this_year = "#{year}年"
    before_year = "#{year - 1}年"
    col_name = "単年変化率_#{this_year}" |> String.to_atom()

    acc_df
    |> DataFrame.mutate([
      {
        ^col_name,
        (col(^this_year) - col(^before_year)) / col(^before_year)
      }
    ])
  end)
  |> DataFrame.pivot_longer(&amp;String.starts_with?(&amp;1, "単年変化率_"),
    names_to: "変化年",
    values_to: "単年変化率"
  )
  |> DataFrame.group_by("品目分類")
  |> DataFrame.summarise(最大単年変化率: max(単年変化率))

change_df =
  change_df
  |> DataFrame.join(single_change_df)
  |> DataFrame.mutate(単調変化係数: 経年変化率 / (最大単年変化率 |> pow(2) |> pow(0.5)))

Kino.DataTable.new(change_df)

change_df
|> DataFrame.sort_by(asc: 単調変化係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

change_df
|> DataFrame.sort_by(desc: 単調変化係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナによる影響

covid_df =
  expenses_df
  |> DataFrame.filter(
    cat02_code == 11 and
      cat01_code <= 100_000_000 and
      unit == "円" and
      time_code >= 2_019_000_000
  )
  |> DataFrame.select(["cat01_code", "品目分類（2020年改定）", "時間軸（年次）", "value"])
  |> DataFrame.rename(["品目分類", "品目分類名", "年", "金額"])
  |> DataFrame.filter(not contains(品目分類名, "までは"))
  |> DataFrame.pivot_wider("年", "金額")
  |> DataFrame.mutate(
    コロナ発生時変化率: (col("2020年") - col("2019年")) / col("2019年"),
    コロナ発生後変化率: (col("2022年") - col("2020年")) / col("2019年")
  )

Kino.DataTable.new(covid_df)

コロナによって悪影響を受けたもの

covid_df
|> DataFrame.sort_by(asc: コロナ発生時変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナによって好影響を受けたもの

covid_df
|> DataFrame.sort_by(desc: コロナ発生時変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナ後に回復したもの

covid_df
|> DataFrame.filter(コロナ発生時変化率 < -0.5 and コロナ発生後変化率 > 0.3)
|> DataFrame.sort_by(desc: コロナ発生後変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

コロナ後に回復していないもの

covid_df
|> DataFrame.filter(コロナ発生時変化率 < -0.5 and コロナ発生後変化率 < 0.2)
|> DataFrame.sort_by(asc: コロナ発生後変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
  title =
    row["品目分類名"]
    |> String.split(" ")
    |> Enum.at(1)

  {
    title,
    plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
  }
end)
|> Kino.Layout.tabs()

Other notebooks:

@andyl

elix_util

MNIST

mnist.livemd

tutorial advanced data-science req axon exla nx

2022-8-18
Yejun Su
@goofansu

ogp

ogp

ogp.livemd

tutorial intermediate ogp kino

2022-8-18
@TomBers

livebookNotes

Attractors

attractors.livemd

advanced data-science decimal vega_lite kino

2022-8-18
Wojtek Mach
@wojtekmach

notebooks

Playground

rss.livemd

tutorial intermediate req easyxml

2022-8-18
@ohisama

ohi_Livebook

req

req.livemd

tutorial advanced apis req kino kino_explorer

2024-1-9
@DockYard-Academy

curriculum

Snowman Script

deprecated_snowman_script.livemd

tutorial intermediate jason kino youtube hidden_cell

2023-3-21
Hugo Baraúna
@hugobarauna

livebook-notebooks

What's new in Livebook 0.10

whats_new_in_livebook_v0.10.livemd

tutorial advanced data-science intermediate kino kino_explorer req

2023-7-8

Back