Notesclub

created by hec & contributors

terms privacy

家計調査データ取得

household_expense_statistics.livemd

Ryo Wakabayashi

@RyoWakabayashi

elixir-learning

Share to X

Share to Bluesky

More notebooks

家計調査データ取得

Mix.install(
  [
    {:explorer, "~> 0.9"},
    {:codepagex, "~> 0.1"},
    {:kino, "~> 0.8"},
    {:kino_vega_lite, "~> 0.1"},
    {:req, "~> 0.5"}
  ],
  config: [
    codepagex: [
      encodings: ["VENDORS/MICSFT/WINDOWS/CP932"]
    ]
  ]
)

出典

総務省統計局ホームページ

家計調査（家計収支編）　時系列データ（二人以上の世帯）

月全品目（2015年改定）
月全品目（2020年改定）

（2023年2月20日に利用）

準備

alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame

shift_jis = "VENDORS/MICSFT/WINDOWS/CP932"

データ取得

string_value =
  "https://www.stat.go.jp/data/kakei/longtime/csv/h-mon-2020.csv"
  |> Req.get!()
  |> then(&amp;Codepagex.to_string!(&amp;1.body, shift_jis))

整形

[header_row | data_rows] =
  string_value
  |> String.split("\r\n")
  |> Enum.slice(3..-1)
  |> Enum.map(&amp;String.split(&amp;1, ","))

fixed_cols = Enum.slice(header_row, 0..7)

month_cols =
  header_row
  |> Enum.slice(8..-1)
  |> Enum.map(fn month ->
    month
    |> String.trim()
    |> String.replace("月", "")
    |> then(&amp;String.slice("0" <> &amp;1, -2..-1))
  end)
  |> Enum.chunk_every(12)
  |> Enum.zip([2020, 2021, 2022])
  |> Enum.flat_map(fn {month_list, year} ->
    Enum.map(month_list, fn month -> "#{year}-#{month}-01" end)
  end)
  |> dbg()

header_row = fixed_cols ++ month_cols

df_2020 =
  [header_row | data_rows]
  |> Enum.map(&amp;Enum.join(&amp;1, ","))
  |> Enum.join("\n")
  |> DataFrame.load_csv!()

Kino.DataTable.new(df_2020)

df_2015 =
  "https://www.stat.go.jp/data/kakei/longtime/csv/h-mon-2015.csv"
  |> Req.get!()
  |> then(&amp;Codepagex.to_string!(&amp;1.body, shift_jis))
  |> String.split("\r\n")
  |> Enum.slice(3..-1)
  |> Enum.map(&amp;String.split(&amp;1, ","))
  |> then(fn [header_row | data_rows] ->
    fixed_cols = Enum.slice(header_row, 0..7)

    month_cols =
      header_row
      |> Enum.slice(8..-1)
      |> Enum.map(fn month ->
        month
        |> String.trim()
        |> String.replace("月", "")
        |> then(&amp;String.slice("0" <> &amp;1, -2..-1))
      end)
      |> Enum.chunk_every(12)
      |> Enum.zip(2015..2019)
      |> Enum.flat_map(fn {month_list, year} ->
        Enum.map(month_list, fn month -> "#{year}-#{month}-01" end)
      end)

    header_row = fixed_cols ++ month_cols

    [header_row | data_rows]
    |> Enum.map(&amp;Enum.join(&amp;1, ","))
    |> Enum.join("\n")
  end)
  |> DataFrame.load_csv!()

Kino.DataTable.new(df_2015)

df_all =
  DataFrame.join(
    DataFrame.select(df_2015, &amp;(&amp;1 != "表側連番")),
    DataFrame.select(df_2020, &amp;(&amp;1 != "表側連番")),
    how: :outer
  )

Kino.DataTable.new(df_all)

pivot_df =
  DataFrame.pivot_longer(
    df_all,
    &amp;String.starts_with?(&amp;1, "20"),
    names_to: "年月",
    values_to: "支出金額"
  )

Kino.DataTable.new(pivot_df)

date_list = Series.to_list(pivot_df["年月"])

month_series =
  date_list
  |> Enum.map(&amp;(&amp;1 |> String.split("-") |> Enum.at(1) |> String.to_integer()))
  |> Series.from_list()

year_series =
  date_list
  |> Enum.map(&amp;(&amp;1 |> String.split("-") |> Enum.at(0) |> String.to_integer()))
  |> Series.from_list()

household_df =
  pivot_df
  |> DataFrame.put("月", month_series)
  |> DataFrame.put("年", year_series)

Kino.DataTable.new(household_df)

ローカルへのダウンロード

Kino.Download.new(
  fn -> DataFrame.dump_csv!(household_df) end,
  filename: "家計支出統計_品目年月別.csv"
)

集約項目一覧

household_df
|> DataFrame.filter(大分類 == "-")
|> DataFrame.distinct(["品目分類"])
|> Kino.DataTable.new()

大分類一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 == "-" and
    小分類 == "-" and
    中間計 == "-" and
    符号 == "-"
)
|> DataFrame.distinct(["大分類", "品目分類"])
|> Kino.DataTable.new()

中分類一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 != "-" and
    小分類 == "-" and
    中間計 == "-" and
    符号 == "-"
)
|> DataFrame.distinct(["大分類", "中分類", "品目分類"])
|> Kino.DataTable.new()

小分類一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 != "-" and
    小分類 != "-" and
    中間計 == "-" and
    符号 == "-"
)
|> DataFrame.distinct(["大分類", "中分類", "小分類", "品目分類"])
|> Kino.DataTable.new()

中間計一覧

household_df
|> DataFrame.filter(
  大分類 != "-" and
    中分類 != "-" and
    小分類 != "-" and
    中間計 != "-" and
    符号 == "-"
)
|> DataFrame.distinct(["中間計", "品目分類"])
|> Kino.DataTable.new()

符号一覧

household_df
|> DataFrame.filter(符号 != "-")
|> DataFrame.distinct(["符号", "品目分類"])
|> Kino.DataTable.new()

ケーキの支出推移

cake_df = DataFrame.filter(household_df, 符号 == "344")

Kino.DataTable.new(cake_df)

month = Series.to_list(cake_df["年月"])
expenses = Series.to_list(cake_df["支出金額"])

VegaLite.new(width: 700, title: "ケーキ支出金額推移")
|> VegaLite.data_from_values(x: month, y: expenses)
|> VegaLite.mark(:line, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :temporal, title: "年月")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")

latest_cake_df = DataFrame.filter(cake_df, 年 == 2022)

month = Series.to_list(latest_cake_df["月"])
expenses = Series.to_list(latest_cake_df["支出金額"])

VegaLite.new(width: 700, title: "ケーキ支出金額推移")
|> VegaLite.data_from_values(x: month, y: expenses)
|> VegaLite.mark(:line, tooltip: true)
|> VegaLite.encode_field(:x, "x", type: :ordinal, title: "年月")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: "支出金額")

latest_cake_df
|> DataFrame.group_by("品目分類")
|> DataFrame.mutate(
  最小: min(支出金額),
  最大: max(支出金額),
  最小最大比: max(支出金額) / min(支出金額)
)
|> DataFrame.select(["最小", "最大", "最小最大比"])
|> DataFrame.to_rows()
|> List.first()

Other notebooks:

@andyl

elix_util

MNIST

mnist.livemd

tutorial advanced data-science req axon exla nx

2022-8-18
Yejun Su
@goofansu

ogp

ogp

ogp.livemd

tutorial intermediate ogp kino

2022-8-18
@TomBers

livebookNotes

Attractors

attractors.livemd

advanced data-science decimal vega_lite kino

2022-8-18
Wojtek Mach
@wojtekmach

notebooks

Playground

rss.livemd

tutorial intermediate req easyxml

2022-8-18
Jonatan Kłosko
@jonatanklosko

meow

Rastrigin

rastrigin_intro.livemd

advanced data-science meow nx exla

2022-8-18
The Lazy Programmer
@thelazyprog

livebooks

Kickstarter budget

kickstarter_budget.livemd

tutorial advanced ex_money decimal kino date_time_parser number

2023-2-12
Ryo Wakabayashi
@RyoWakabayashi

elixir-learning

AWS SageMaker Serverless

sagemaker_serverless.livemd

tutorial advanced data-science apis aws ex_aws ex_aws_sts ex_aws_s3 req nx stb_image hackney sweet_xml kino flow

2024-9-19

Back