e-Stat 家計支出
Mix.install([
{:explorer, "~> 0.9"},
{:kino, "~> 0.14"},
{:kino_vega_lite, "~> 0.1"},
{:req, "~> 0.5"}
])
準備
alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame
データ取得
出典:政府統計の総合窓口(e-Stat)
「家計調査 / 家計収支編 総世帯 品目分類(2020年改定)(金額)」(統計局統計調査部消費統計課)を加工して利用
API仕様:
app_id_input = Kino.Input.password("APP ID")
api_path = "http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData"
data_id = "0003348234"
response_body =
"#{api_path}?appId=#{Kino.Input.read(app_id_input)}&statsDataId=#{data_id}"
|> Req.get!()
|> Map.get(:body)
rows = String.split(response_body, "\n")
value_index = Enum.find_index(rows, &(&1 == "\"VALUE\""))
expenses_df =
rows
|> Enum.slice((value_index + 1)..-1)
|> Enum.join("\n")
|> DataFrame.load_csv!()
Kino.DataTable.new(expenses_df)
expenses_df["時間軸(年次)"]
|> Series.frequencies()
|> DataFrame.sort_by(asc: values)
|> Kino.DataTable.new()
expenses_df[["cat01_code", "品目分類(2020年改定)"]]
|> DataFrame.distinct()
|> Kino.DataTable.new()
expenses_df[["cat01_code", "品目分類(2020年改定)"]]
|> DataFrame.filter(
remainder(cat01_code, 10_000_000) == 0 and
cat01_code <= 100_000_000
)
|> DataFrame.distinct()
|> Kino.DataTable.new()
expenses_df[["cat02_code", "世帯区分"]]
|> DataFrame.distinct()
|> Kino.DataTable.new()
total_df =
expenses_df
|> DataFrame.filter(
time_code == 2_022_000_000 and
cat01_code == 1_100_000
)
|> DataFrame.select(["世帯区分", "value"])
Kino.DataTable.new(total_df)
大分類毎の割合
total =
expenses_df
|> DataFrame.filter(
time_code == 2_022_000_000 and
cat02_code == 11 and
cat01_code == 1_100_000
)
|> DataFrame.pull("value")
|> Series.to_list()
|> List.first()
latest_df =
expenses_df
|> DataFrame.filter(
time_code == 2_022_000_000 and
cat02_code == 11 and
remainder(cat01_code, 10_000_000) == 0 and
cat01_code <= 100_000_000
)
|> DataFrame.select(["品目分類(2020年改定)", "value"])
|> DataFrame.rename(["品目分類", "支出金額"])
# 小数点以下1桁の%に変換
|> DataFrame.mutate(割合: cast(支出金額 / ^total * 1000, :integer) / 10)
Kino.DataTable.new(latest_df)
items = Series.to_list(latest_df["品目分類"])
expenses = Series.to_list(latest_df["支出金額"])
VegaLite.new(width: 700, title: "支出金額内訳")
|> VegaLite.data_from_values(x: items, y: expenses)
|> VegaLite.mark(:bar, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :nominal, title: "品目分類", sort: "-y")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")
VegaLite.new(title: "支出金額内訳")
|> VegaLite.data_from_values(x: items, y: expenses)
|> VegaLite.mark(:arc, inner_radius: 50, tooltip: true)
|> VegaLite.encode_field(:color, "x", type: :nominal, title: "品目分類")
|> VegaLite.encode_field(:theta, "y", type: :quantitative, title: "支出金額")
各値の推移
plot_line = fn code_list, title, value_title ->
target_df =
expenses_df
|> DataFrame.filter(cat01_code in ^code_list and cat02_code == 11)
|> DataFrame.select(["時間軸(年次)", "value", "品目分類(2020年改定)"])
|> DataFrame.rename(["年", value_title, "品目分類"])
year = Series.to_list(target_df["年"])
expenses = Series.to_list(target_df[value_title])
category = Series.to_list(target_df["品目分類"])
VegaLite.new(width: 600, title: title)
|> VegaLite.data_from_values(x: year, y: expenses, color: category)
|> VegaLite.mark(:line, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :ordinal, title: "年")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: value_title)
|> VegaLite.encode_field(:color, "color", title: "品目分類")
end
plot_line.([1_100_000], "総支出額推移", "金額")
plot_line.([300_000, 400_000, 500_000, 700_000], "世帯人員推移", "人数")
[
{"食料〜被服", Enum.to_list(10_000_000..50_000_000//10_000_000)},
{"保健医療〜その他", Enum.to_list(60_000_000..100_000_000//10_000_000)}
]
|> Enum.map(fn {title, list} ->
{
title,
plot_line.(list, "大分類毎支出金額推移", "金額")
}
end)
|> Kino.Layout.tabs()
plot_line.([10_110_001, 10_120_000, 10_130_000], "支出額推移", "金額")
plot_line.([10_211_000, 10_310_000, 10_510_000, 10_800_000], "支出額推移", "金額")
plot_line.([50_100_000, 50_200_000, 50_700_000], "支出額推移", "金額")
plot_line.([50_210_010, 50_220_010, 50_230_010, 50_600_020], "支出額推移", "金額")
plot_line.([70_300_010, 70_300_020, 70_300_030], "支出額推移", "金額")
plot_line.([90_100_010, 90_100_050], "支出額推移", "金額")
plot_line.([90_300_010, 90_300_020, 90_300_030], "支出額推移", "金額")
特徴の強い項目の抽出
summary_df =
expenses_df
|> DataFrame.filter(
cat02_code == 11 and
cat01_code <= 100_000_000 and
unit == "円"
)
|> DataFrame.group_by(["cat01_code", "品目分類(2020年改定)"])
|> DataFrame.summarise(変動係数: standard_deviation(value) / mean(value))
|> DataFrame.rename(["品目分類", "品目分類名", "変動係数"])
Kino.DataTable.new(summary_df)
変動の少ないもの
summary_df
|> DataFrame.sort_by(asc: 変動係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
変動の大きいもの
summary_df
|> DataFrame.sort_by(desc: 変動係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
単調増加、単調減少
change_df =
expenses_df
|> DataFrame.filter(
cat02_code == 11 and
cat01_code <= 100_000_000 and
unit == "円"
)
|> DataFrame.select(["世帯区分", "cat01_code", "品目分類(2020年改定)", "時間軸(年次)", "value", "unit"])
|> DataFrame.rename(["世帯区分", "品目分類", "品目分類名", "年", "金額", "単位"])
# 年毎の金額を列に展開する
|> DataFrame.pivot_wider("年", "金額")
# 16年間存在した品目分類だけを対象にする
|> DataFrame.filter(
not contains(品目分類名, "までは") and
"2007年" |> col() |> is_not_nil()
)
|> DataFrame.mutate(経年変化率: (col("2022年") - col("2007年")) / col("2007年"))
|> DataFrame.sort_by(desc: 経年変化率)
Kino.DataTable.new(change_df)
single_change_df =
2008..2022
|> Enum.reduce(change_df, fn year, acc_df ->
this_year = "#{year}年"
before_year = "#{year - 1}年"
col_name = "単年変化率_#{this_year}" |> String.to_atom()
acc_df
|> DataFrame.mutate([
{
^col_name,
(col(^this_year) - col(^before_year)) / col(^before_year)
}
])
end)
|> DataFrame.pivot_longer(&String.starts_with?(&1, "単年変化率_"),
names_to: "変化年",
values_to: "単年変化率"
)
|> DataFrame.group_by("品目分類")
|> DataFrame.summarise(最大単年変化率: max(単年変化率))
change_df =
change_df
|> DataFrame.join(single_change_df)
|> DataFrame.mutate(単調変化係数: 経年変化率 / (最大単年変化率 |> pow(2) |> pow(0.5)))
Kino.DataTable.new(change_df)
change_df
|> DataFrame.sort_by(asc: 単調変化係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
change_df
|> DataFrame.sort_by(desc: 単調変化係数)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
コロナによる影響
covid_df =
expenses_df
|> DataFrame.filter(
cat02_code == 11 and
cat01_code <= 100_000_000 and
unit == "円" and
time_code >= 2_019_000_000
)
|> DataFrame.select(["cat01_code", "品目分類(2020年改定)", "時間軸(年次)", "value"])
|> DataFrame.rename(["品目分類", "品目分類名", "年", "金額"])
|> DataFrame.filter(not contains(品目分類名, "までは"))
|> DataFrame.pivot_wider("年", "金額")
|> DataFrame.mutate(
コロナ発生時変化率: (col("2020年") - col("2019年")) / col("2019年"),
コロナ発生後変化率: (col("2022年") - col("2020年")) / col("2019年")
)
Kino.DataTable.new(covid_df)
コロナによって悪影響を受けたもの
covid_df
|> DataFrame.sort_by(asc: コロナ発生時変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
コロナによって好影響を受けたもの
covid_df
|> DataFrame.sort_by(desc: コロナ発生時変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
コロナ後に回復したもの
covid_df
|> DataFrame.filter(コロナ発生時変化率 < -0.5 and コロナ発生後変化率 > 0.3)
|> DataFrame.sort_by(desc: コロナ発生後変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()
コロナ後に回復していないもの
covid_df
|> DataFrame.filter(コロナ発生時変化率 < -0.5 and コロナ発生後変化率 < 0.2)
|> DataFrame.sort_by(asc: コロナ発生後変化率)
|> DataFrame.slice(0..4)
|> DataFrame.to_rows()
|> Enum.map(fn row ->
title =
row["品目分類名"]
|> String.split(" ")
|> Enum.at(1)
{
title,
plot_line.([row["品目分類"]], "#{title}支出額推移", "金額")
}
end)
|> Kino.Layout.tabs()