Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

RESAS から産業構造を取得する

livebooks/resas/industry.livemd

RESAS から産業構造を取得する

Mix.install([
  {:req, "~> 0.5"},
  {:explorer, "~> 0.9"},
  {:nx, "~> 0.9"},
  {:kino, "~> 0.14"},
  {:kino_vega_lite, "~> 0.1"}
])

情報の設定

このノートブックはRESAS(地域経済分析システム)のデータを加工して作成しています

# RESAS のAPIキーを入力する
api_key_input = Kino.Input.password("API_KEY")
base_url = "https://opendata.resas-portal.go.jp"
alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame

RESAS の認証

auth_header = {"X-API-KEY", Kino.Input.read(api_key_input)}
Kino.nothing()

都道府県一覧の取得

prefectures_url = "#{base_url}/api/v1/prefectures"

prefectures_df =
  prefectures_url
  |> Req.get!(headers: [auth_header])
  |> then(&DataFrame.new(&1.body["result"]))

prefectures_df
|> Kino.DataTable.new(sorting_enabled: true)
oita_pref_code =
  prefectures_df
  |> DataFrame.filter_with(&Series.equal(&1["prefName"], "大分県"))
  |> DataFrame.pull("prefCode")
  |> Series.first()

産業一覧の取得

# 大分類
industries_broad_df =
  "#{base_url}/api/v1/industries/broad"
  |> Req.get!(headers: [auth_header])
  |> then(&DataFrame.new(&1.body["result"]))

Kino.DataTable.new(industries_broad_df)
# 中分類
get_industries_middle = fn sic_code ->
  "#{base_url}/api/v1/industries/middle?sicCode=#{sic_code}"
  |> Req.get!(headers: [auth_header])
  |> then(&DataFrame.new(&1.body["result"]))
end

industries_middle_df =
  industries_broad_df
  |> DataFrame.pull("sicCode")
  |> Series.to_list()
  |> Enum.map(&get_industries_middle.(&1))
  |> Enum.filter(&(DataFrame.n_rows(&1) > 0))
  |> DataFrame.concat_rows()

Kino.DataTable.new(industries_middle_df)
# 小分類
get_industries_narrow = fn simc_code ->
  "#{base_url}/api/v1/industries/narrow?simcCode=#{simc_code}"
  |> Req.get!(headers: [auth_header])
  |> then(&DataFrame.new(&1.body["result"]))
end

industries_narrow_df =
  industries_middle_df
  |> DataFrame.pull("simcCode")
  |> Series.to_list()
  |> Enum.map(&get_industries_narrow.(&1))
  |> Enum.filter(&(DataFrame.n_rows(&1) > 0))
  |> DataFrame.concat_rows()

Kino.DataTable.new(industries_narrow_df)

地域別特化係数の取得

get_industry_power = fn year, pref_code, area_type, disp_type, sic_code, simc_code ->
  query =
    "?year=#{year}" <>
      "&prefCode=#{pref_code}" <>
      "&areaType=#{area_type}" <>
      "&dispType=#{disp_type}" <>
      "&sicCode=#{sic_code}" <>
      "&simcCode=#{simc_code}"

  "#{base_url}/api/v1/industry/power/forArea#{query}"
  |> Req.get!(headers: [auth_header])
  |> then(&amp; &amp;1.body["result"])
end
industry_power = get_industry_power.("2016", oita_pref_code, "1", "1", "A", "01")
prefectures_industry_power_df = DataFrame.new(industry_power["prefectures"])

Kino.DataTable.new(prefectures_industry_power_df)
get_values = fn df, col ->
  df
  |> DataFrame.pull(col)
  |> Series.to_list()
end
# 都道府県別棒グラフ
prefectures_bar = fn df, col, value_label ->
  x = get_values.(df, "prefName")
  y = get_values.(df, col)

  VegaLite.new(width: 800, height: 400, title: "都道府県別#{value_label}")
  |> VegaLite.data_from_values(x: x, y: y)
  |> VegaLite.mark(:bar)
  |> VegaLite.encode_field(:x, "x", type: :nominal, title: "都道府県")
  |> VegaLite.encode_field(:y, "y", type: :quantitative, title: value_label)
end
prefectures_bar.(prefectures_industry_power_df, "value", "農業特化係数")
# 中分類コードリストを取得
prefectures_industry_power_df =
  industries_middle_df
  |> DataFrame.select(["sicCode", "simcCode"])
  |> DataFrame.to_rows()
  # 中分類毎に特化係数を取得
  |> Enum.map(fn industry ->
    get_industry_power.(
      "2016",
      oita_pref_code,
      "1",
      "1",
      industry["sicCode"],
      industry["simcCode"]
    )
    |> Map.get("prefectures")
    # 該当データがないものは除去する
    |> Enum.filter(&amp;(&amp;1 != nil))
    # データを整形する
    |> Enum.map(fn datum ->
      Map.merge(datum, %{"sicCode" => industry["sicCode"], "simcCode" => industry["simcCode"]})
    end)
  end)
  # 中分類毎にデータフレーム化する
  |> Enum.map(&amp;DataFrame.new(&amp;1))
  # 該当データがないものは除去する
  |> Enum.filter(&amp;(DataFrame.n_rows(&amp;1) > 0))
  # 全中分類のデータフレームを結合する
  |> DataFrame.concat_rows()
  # 大分類、中分類名を表示するために結合する
  |> DataFrame.join(industries_broad_df)
  |> DataFrame.join(industries_middle_df)

Kino.DataTable.new(prefectures_industry_power_df)
# 中分類名取得
get_simc_name = fn simc_code ->
  industries_middle_df
  |> DataFrame.filter_with(&amp;Series.equal(&amp;1["simcCode"], simc_code))
  |> DataFrame.pull("simcName")
  |> Series.first()
end
# 都道府県別産業構造中分類棒グラフ
prefectures_industry_bar = fn simc_code ->
  label = get_simc_name.(simc_code)

  prefectures_industry_power_df
  |> DataFrame.filter_with(&amp;Series.equal(&amp;1["simcCode"], simc_code))
  |> prefectures_bar.("value", "#{label}特化係数")
end
prefectures_industry_bar.("81")
prefectures_industry_bar.("50")

ピボット

pivot_df =
  prefectures_industry_power_df
  |> DataFrame.select(["prefCode", "prefName", "simcCode", "value"])
  |> DataFrame.pivot_wider("simcCode", "value")

Kino.DataTable.new(pivot_df)
# 補完対象の列
cols =
  prefectures_industry_power_df
  |> DataFrame.distinct(["simcCode"])
  |> DataFrame.pull("simcCode")
  |> Series.to_list()
# 欠損値の補完
pivot_df =
  cols
  |> Enum.reduce(pivot_df, fn col, df ->
    DataFrame.mutate_with(df, &amp;%{col => Series.fill_missing(&amp;1[col], 0.0)})
  end)

Kino.DataTable.new(pivot_df)

散布図の表示

scatter = fn df, x_col, y_col, size ->
  x = get_values.(df, x_col)
  y = get_values.(df, y_col)

  VegaLite.new(width: size, height: size)
  |> VegaLite.data_from_values(x: x, y: y)
  |> VegaLite.encode_field(:x, "x",
    type: :quantitative,
    scale: [domain: [Enum.min(x), Enum.max(x)]],
    title: x_col
  )
  |> VegaLite.encode_field(:y, "y",
    type: :quantitative,
    scale: [domain: [Enum.min(y), Enum.max(y)]],
    title: y_col
  )
  |> VegaLite.layers([
    VegaLite.new()
    |> VegaLite.mark(:point),
    VegaLite.new()
    |> VegaLite.mark(:line)
    |> VegaLite.transform(regression: "x", on: "y")
  ])
end
scatter.(pivot_df, "01", "02", 300)
scatter.(pivot_df, "01", "80", 300)

一人当たり地方税の取得

get_local_tax = fn pref_code ->
  query = "?prefCode=#{pref_code}&cityCode=-"

  "#{base_url}/api/v1/municipality/taxes/perYear#{query}"
  |> Req.get!(headers: [auth_header])
  |> then(&amp; &amp;1.body["result"]["data"])
end
local_tax_df =
  prefectures_df
  |> DataFrame.pull("prefCode")
  |> Series.to_list()
  |> Enum.flat_map(fn pref_code ->
    pref_code
    |> get_local_tax.()
    |> Enum.map(&amp;Map.merge(&amp;1, %{"prefCode" => pref_code}))
  end)
  |> DataFrame.new()
  |> DataFrame.join(prefectures_df)

Kino.DataTable.new(local_tax_df)
target_df = DataFrame.filter_with(local_tax_df, &amp;Series.equal(&amp;1["prefCode"], oita_pref_code))
VegaLite.new(width: 600, height: 400)
|> VegaLite.data_from_values(target_df, only: ["year", "value"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "year", type: :quantitative)
|> VegaLite.encode_field(:y, "value", type: :quantitative)
local_tax_df
|> DataFrame.filter_with(&amp;Series.equal(&amp;1["year"], 2019))
|> prefectures_bar.("value", "一人当たり地方税")

人口構成の取得

get_population_composition = fn pref_code ->
  query = "?prefCode=#{pref_code}&cityCode=-"

  "#{base_url}/api/v1/population/composition/perYear#{query}"
  |> Req.get!(headers: [auth_header])
  |> then(&amp; &amp;1.body["result"]["data"])
end
population_composition_df =
  prefectures_df
  |> DataFrame.pull("prefCode")
  |> Series.to_list()
  |> Enum.flat_map(fn pref_code ->
    pref_code
    |> get_population_composition.()
    |> Enum.flat_map(fn data ->
      data["data"]
      |> Enum.map(fn datum ->
        Map.merge(datum, %{"label" => data["label"], "prefCode" => pref_code})
      end)
    end)
  end)
  |> DataFrame.new()
  |> DataFrame.join(prefectures_df)

Kino.DataTable.new(population_composition_df)
target_df =
  population_composition_df
  |> DataFrame.filter_with(&amp;Series.equal(&amp;1["prefCode"], oita_pref_code))
VegaLite.new(width: 700, height: 400, title: "大分県人口構成推移")
|> VegaLite.data_from_values(target_df, only: ["year", "value", "label"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "year", type: :quantitative)
|> VegaLite.encode_field(:y, "value", type: :quantitative)
|> VegaLite.encode_field(:color, "label", type: :nominal)
target_df =
  population_composition_df
  |> DataFrame.filter_with(&amp;Series.equal(&amp;1["year"], 2015))
  |> DataFrame.filter_with(&amp;Series.not_equal(&amp;1["label"], "総人口"))
VegaLite.new(width: 650, height: 400, title: "2015年都道府県別人口構成")
|> VegaLite.data_from_values(target_df, only: ["prefName", "value", "label"])
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "prefName", type: :nominal)
|> VegaLite.encode_field(:y, "value", type: :quantitative)
|> VegaLite.encode_field(:color, "label", type: :nominal)
population_df =
  population_composition_df
  |> DataFrame.filter(year == 2015)
  |> DataFrame.mutate(value: cast(value, :float))
  |> DataFrame.select(["prefCode", "prefName", "label", "value"])
  |> DataFrame.pivot_wider("label", "value")
  |> DataFrame.mutate(老年率: 老年人口 / 総人口)

Kino.DataTable.new(population_df)
prefectures_bar.(population_df, "老年率", "老年率")

相関係数の取得

target_df =
  local_tax_df
  |> DataFrame.filter(year == 2015)
  |> DataFrame.mutate(value: cast(value, :float))
  |> DataFrame.select(["prefCode", "value"])
  |> DataFrame.rename(["prefCode", "localTax"])
  |> DataFrame.join(population_df)
  |> DataFrame.join(pivot_df)

Kino.DataTable.new(target_df)
scatter.(target_df, "localTax", "01", 300)
target_df
|> DataFrame.filter_with(&amp;Series.not_equal(&amp;1["prefCode"], 13))
|> scatter.("localTax", "01", 300)
scatter.(target_df, "localTax", "総人口", 300)
target_df
|> DataFrame.filter(prefCode != 13)
|> scatter.("localTax", "総人口", 300)
cols =
  prefectures_industry_power_df
  |> DataFrame.distinct(["simcCode"])
  |> DataFrame.pull("simcCode")
  |> Series.to_list()
cols = ["localTax", "総人口", "年少人口", "生産年齢人口", "老年人口", "老年率"] ++ cols
standardize = fn df, column ->
  mean =
    df
    |> DataFrame.pull(column)
    |> Series.mean()

  std =
    df
    |> DataFrame.pull(column)
    |> Series.standard_deviation()

  df
  |> DataFrame.mutate_with(fn in_df ->
    %{column => Series.subtract(in_df[column], mean)}
  end)
  |> DataFrame.mutate_with(fn in_df ->
    %{column => Series.divide(in_df[column], std)}
  end)
end
standardized_df =
  cols
  |> Enum.reduce(target_df, fn col, df ->
    standardize.(df, col)
  end)

Kino.DataTable.new(standardized_df)
df_to_tensor = fn df ->
  df
  |> DataFrame.names()
  |> Enum.map(fn col ->
    standardized_df
    |> DataFrame.pull(col)
    |> Series.to_tensor()
  end)
  |> Nx.concatenate()
  |> Nx.reshape({DataFrame.n_columns(df), DataFrame.n_rows(df)})
end

standardized_tensor =
  standardized_df
  |> DataFrame.select(cols)
  |> df_to_tensor.()
  |> Nx.transpose()
covariance_tensor =
  standardized_tensor
  |> Nx.transpose()
  |> Nx.dot(standardized_tensor)
  |> Nx.divide(DataFrame.n_rows(standardized_df))
add_cols_label = fn list, cols_ ->
  [{"x", cols_} | list]
end

covariance_df =
  cols
  |> Stream.with_index()
  |> Enum.map(fn {col, index} ->
    {col, Nx.to_flat_list(covariance_tensor[index])}
  end)
  |> add_cols_label.(cols)
  |> DataFrame.new()

covariance_df
|> Kino.DataTable.new(keys: ["x" | cols])
local_tax_heatmap =
  cols
  |> Stream.with_index()
  |> Enum.map(fn {col_1, index_1} ->
    %{
      x: "localTax",
      y: col_1,
      covariance: Nx.to_number(covariance_tensor[0][index_1])
    }
  end)
  |> List.flatten()
VegaLite.new(width: 100, height: 1600)
|> VegaLite.data_from_values(local_tax_heatmap)
|> VegaLite.mark(:rect)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :nominal)
|> VegaLite.encode_field(
  :fill,
  "covariance",
  type: :quantitative,
  scale: [
    domain: [-1, 1],
    scheme: :blueorange
  ]
)
# 通信業
scatter.(target_df, "localTax", "37", 300)
prefectures_industry_bar.("37")
# 金融商品取引業,商品先物取引業
target_df
|> DataFrame.filter_with(&amp;Series.not_equal(&amp;1["prefCode"], 13))
|> scatter.("localTax", "65", 300)
prefectures_industry_bar.("65")
# 医療業
target_df
|> DataFrame.filter_with(&amp;Series.not_equal(&amp;1["prefCode"], 13))
|> scatter.("localTax", "83", 300)
prefectures_industry_bar.("83")
# 社会保険・社会福祉・介護事業
target_df
|> scatter.("localTax", "85", 300)
prefectures_industry_bar.("85")
elderly_heatmap =
  cols
  |> Stream.with_index()
  |> Enum.map(fn {col_1, index_1} ->
    %{
      x: "老年率",
      y: col_1,
      covariance: Nx.to_number(covariance_tensor[5][index_1])
    }
  end)
  |> List.flatten()
VegaLite.new(width: 100, height: 1600)
|> VegaLite.data_from_values(elderly_heatmap)
|> VegaLite.mark(:rect)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :nominal)
|> VegaLite.encode_field(
  :fill,
  "covariance",
  type: :quantitative,
  scale: [
    domain: [-1, 1],
    scheme: :blueorange
  ]
)
# 不動産取引業

|> scatter.(target_df, "老年率", "68", 300)
prefectures_industry_bar.("68")