RESAS から産業構造を取得する
Mix.install([
{:req, "~> 0.5"},
{:explorer, "~> 0.9"},
{:nx, "~> 0.9"},
{:kino, "~> 0.14"},
{:kino_vega_lite, "~> 0.1"}
])
情報の設定
このノートブックはRESAS(地域経済分析システム)のデータを加工して作成しています
# RESAS のAPIキーを入力する
api_key_input = Kino.Input.password("API_KEY")
base_url = "https://opendata.resas-portal.go.jp"
alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame
RESAS の認証
auth_header = {"X-API-KEY", Kino.Input.read(api_key_input)}
Kino.nothing()
都道府県一覧の取得
prefectures_url = "#{base_url}/api/v1/prefectures"
prefectures_df =
prefectures_url
|> Req.get!(headers: [auth_header])
|> then(&DataFrame.new(&1.body["result"]))
prefectures_df
|> Kino.DataTable.new(sorting_enabled: true)
oita_pref_code =
prefectures_df
|> DataFrame.filter_with(&Series.equal(&1["prefName"], "大分県"))
|> DataFrame.pull("prefCode")
|> Series.first()
産業一覧の取得
# 大分類
industries_broad_df =
"#{base_url}/api/v1/industries/broad"
|> Req.get!(headers: [auth_header])
|> then(&DataFrame.new(&1.body["result"]))
Kino.DataTable.new(industries_broad_df)
# 中分類
get_industries_middle = fn sic_code ->
"#{base_url}/api/v1/industries/middle?sicCode=#{sic_code}"
|> Req.get!(headers: [auth_header])
|> then(&DataFrame.new(&1.body["result"]))
end
industries_middle_df =
industries_broad_df
|> DataFrame.pull("sicCode")
|> Series.to_list()
|> Enum.map(&get_industries_middle.(&1))
|> Enum.filter(&(DataFrame.n_rows(&1) > 0))
|> DataFrame.concat_rows()
Kino.DataTable.new(industries_middle_df)
# 小分類
get_industries_narrow = fn simc_code ->
"#{base_url}/api/v1/industries/narrow?simcCode=#{simc_code}"
|> Req.get!(headers: [auth_header])
|> then(&DataFrame.new(&1.body["result"]))
end
industries_narrow_df =
industries_middle_df
|> DataFrame.pull("simcCode")
|> Series.to_list()
|> Enum.map(&get_industries_narrow.(&1))
|> Enum.filter(&(DataFrame.n_rows(&1) > 0))
|> DataFrame.concat_rows()
Kino.DataTable.new(industries_narrow_df)
地域別特化係数の取得
get_industry_power = fn year, pref_code, area_type, disp_type, sic_code, simc_code ->
query =
"?year=#{year}" <>
"&prefCode=#{pref_code}" <>
"&areaType=#{area_type}" <>
"&dispType=#{disp_type}" <>
"&sicCode=#{sic_code}" <>
"&simcCode=#{simc_code}"
"#{base_url}/api/v1/industry/power/forArea#{query}"
|> Req.get!(headers: [auth_header])
|> then(& &1.body["result"])
end
industry_power = get_industry_power.("2016", oita_pref_code, "1", "1", "A", "01")
prefectures_industry_power_df = DataFrame.new(industry_power["prefectures"])
Kino.DataTable.new(prefectures_industry_power_df)
get_values = fn df, col ->
df
|> DataFrame.pull(col)
|> Series.to_list()
end
# 都道府県別棒グラフ
prefectures_bar = fn df, col, value_label ->
x = get_values.(df, "prefName")
y = get_values.(df, col)
VegaLite.new(width: 800, height: 400, title: "都道府県別#{value_label}")
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "x", type: :nominal, title: "都道府県")
|> VegaLite.encode_field(:y, "y", type: :quantitative, title: value_label)
end
prefectures_bar.(prefectures_industry_power_df, "value", "農業特化係数")
# 中分類コードリストを取得
prefectures_industry_power_df =
industries_middle_df
|> DataFrame.select(["sicCode", "simcCode"])
|> DataFrame.to_rows()
# 中分類毎に特化係数を取得
|> Enum.map(fn industry ->
get_industry_power.(
"2016",
oita_pref_code,
"1",
"1",
industry["sicCode"],
industry["simcCode"]
)
|> Map.get("prefectures")
# 該当データがないものは除去する
|> Enum.filter(&(&1 != nil))
# データを整形する
|> Enum.map(fn datum ->
Map.merge(datum, %{"sicCode" => industry["sicCode"], "simcCode" => industry["simcCode"]})
end)
end)
# 中分類毎にデータフレーム化する
|> Enum.map(&DataFrame.new(&1))
# 該当データがないものは除去する
|> Enum.filter(&(DataFrame.n_rows(&1) > 0))
# 全中分類のデータフレームを結合する
|> DataFrame.concat_rows()
# 大分類、中分類名を表示するために結合する
|> DataFrame.join(industries_broad_df)
|> DataFrame.join(industries_middle_df)
Kino.DataTable.new(prefectures_industry_power_df)
# 中分類名取得
get_simc_name = fn simc_code ->
industries_middle_df
|> DataFrame.filter_with(&Series.equal(&1["simcCode"], simc_code))
|> DataFrame.pull("simcName")
|> Series.first()
end
# 都道府県別産業構造中分類棒グラフ
prefectures_industry_bar = fn simc_code ->
label = get_simc_name.(simc_code)
prefectures_industry_power_df
|> DataFrame.filter_with(&Series.equal(&1["simcCode"], simc_code))
|> prefectures_bar.("value", "#{label}特化係数")
end
prefectures_industry_bar.("81")
prefectures_industry_bar.("50")
ピボット
pivot_df =
prefectures_industry_power_df
|> DataFrame.select(["prefCode", "prefName", "simcCode", "value"])
|> DataFrame.pivot_wider("simcCode", "value")
Kino.DataTable.new(pivot_df)
# 補完対象の列
cols =
prefectures_industry_power_df
|> DataFrame.distinct(["simcCode"])
|> DataFrame.pull("simcCode")
|> Series.to_list()
# 欠損値の補完
pivot_df =
cols
|> Enum.reduce(pivot_df, fn col, df ->
DataFrame.mutate_with(df, &%{col => Series.fill_missing(&1[col], 0.0)})
end)
Kino.DataTable.new(pivot_df)
散布図の表示
scatter = fn df, x_col, y_col, size ->
x = get_values.(df, x_col)
y = get_values.(df, y_col)
VegaLite.new(width: size, height: size)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.encode_field(:x, "x",
type: :quantitative,
scale: [domain: [Enum.min(x), Enum.max(x)]],
title: x_col
)
|> VegaLite.encode_field(:y, "y",
type: :quantitative,
scale: [domain: [Enum.min(y), Enum.max(y)]],
title: y_col
)
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.mark(:point),
VegaLite.new()
|> VegaLite.mark(:line)
|> VegaLite.transform(regression: "x", on: "y")
])
end
scatter.(pivot_df, "01", "02", 300)
scatter.(pivot_df, "01", "80", 300)
一人当たり地方税の取得
get_local_tax = fn pref_code ->
query = "?prefCode=#{pref_code}&cityCode=-"
"#{base_url}/api/v1/municipality/taxes/perYear#{query}"
|> Req.get!(headers: [auth_header])
|> then(& &1.body["result"]["data"])
end
local_tax_df =
prefectures_df
|> DataFrame.pull("prefCode")
|> Series.to_list()
|> Enum.flat_map(fn pref_code ->
pref_code
|> get_local_tax.()
|> Enum.map(&Map.merge(&1, %{"prefCode" => pref_code}))
end)
|> DataFrame.new()
|> DataFrame.join(prefectures_df)
Kino.DataTable.new(local_tax_df)
target_df = DataFrame.filter_with(local_tax_df, &Series.equal(&1["prefCode"], oita_pref_code))
VegaLite.new(width: 600, height: 400)
|> VegaLite.data_from_values(target_df, only: ["year", "value"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "year", type: :quantitative)
|> VegaLite.encode_field(:y, "value", type: :quantitative)
local_tax_df
|> DataFrame.filter_with(&Series.equal(&1["year"], 2019))
|> prefectures_bar.("value", "一人当たり地方税")
人口構成の取得
get_population_composition = fn pref_code ->
query = "?prefCode=#{pref_code}&cityCode=-"
"#{base_url}/api/v1/population/composition/perYear#{query}"
|> Req.get!(headers: [auth_header])
|> then(& &1.body["result"]["data"])
end
population_composition_df =
prefectures_df
|> DataFrame.pull("prefCode")
|> Series.to_list()
|> Enum.flat_map(fn pref_code ->
pref_code
|> get_population_composition.()
|> Enum.flat_map(fn data ->
data["data"]
|> Enum.map(fn datum ->
Map.merge(datum, %{"label" => data["label"], "prefCode" => pref_code})
end)
end)
end)
|> DataFrame.new()
|> DataFrame.join(prefectures_df)
Kino.DataTable.new(population_composition_df)
target_df =
population_composition_df
|> DataFrame.filter_with(&Series.equal(&1["prefCode"], oita_pref_code))
VegaLite.new(width: 700, height: 400, title: "大分県人口構成推移")
|> VegaLite.data_from_values(target_df, only: ["year", "value", "label"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "year", type: :quantitative)
|> VegaLite.encode_field(:y, "value", type: :quantitative)
|> VegaLite.encode_field(:color, "label", type: :nominal)
target_df =
population_composition_df
|> DataFrame.filter_with(&Series.equal(&1["year"], 2015))
|> DataFrame.filter_with(&Series.not_equal(&1["label"], "総人口"))
VegaLite.new(width: 650, height: 400, title: "2015年都道府県別人口構成")
|> VegaLite.data_from_values(target_df, only: ["prefName", "value", "label"])
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "prefName", type: :nominal)
|> VegaLite.encode_field(:y, "value", type: :quantitative)
|> VegaLite.encode_field(:color, "label", type: :nominal)
population_df =
population_composition_df
|> DataFrame.filter(year == 2015)
|> DataFrame.mutate(value: cast(value, :float))
|> DataFrame.select(["prefCode", "prefName", "label", "value"])
|> DataFrame.pivot_wider("label", "value")
|> DataFrame.mutate(老年率: 老年人口 / 総人口)
Kino.DataTable.new(population_df)
prefectures_bar.(population_df, "老年率", "老年率")
相関係数の取得
target_df =
local_tax_df
|> DataFrame.filter(year == 2015)
|> DataFrame.mutate(value: cast(value, :float))
|> DataFrame.select(["prefCode", "value"])
|> DataFrame.rename(["prefCode", "localTax"])
|> DataFrame.join(population_df)
|> DataFrame.join(pivot_df)
Kino.DataTable.new(target_df)
scatter.(target_df, "localTax", "01", 300)
target_df
|> DataFrame.filter_with(&Series.not_equal(&1["prefCode"], 13))
|> scatter.("localTax", "01", 300)
scatter.(target_df, "localTax", "総人口", 300)
target_df
|> DataFrame.filter(prefCode != 13)
|> scatter.("localTax", "総人口", 300)
cols =
prefectures_industry_power_df
|> DataFrame.distinct(["simcCode"])
|> DataFrame.pull("simcCode")
|> Series.to_list()
cols = ["localTax", "総人口", "年少人口", "生産年齢人口", "老年人口", "老年率"] ++ cols
standardize = fn df, column ->
mean =
df
|> DataFrame.pull(column)
|> Series.mean()
std =
df
|> DataFrame.pull(column)
|> Series.standard_deviation()
df
|> DataFrame.mutate_with(fn in_df ->
%{column => Series.subtract(in_df[column], mean)}
end)
|> DataFrame.mutate_with(fn in_df ->
%{column => Series.divide(in_df[column], std)}
end)
end
standardized_df =
cols
|> Enum.reduce(target_df, fn col, df ->
standardize.(df, col)
end)
Kino.DataTable.new(standardized_df)
df_to_tensor = fn df ->
df
|> DataFrame.names()
|> Enum.map(fn col ->
standardized_df
|> DataFrame.pull(col)
|> Series.to_tensor()
end)
|> Nx.concatenate()
|> Nx.reshape({DataFrame.n_columns(df), DataFrame.n_rows(df)})
end
standardized_tensor =
standardized_df
|> DataFrame.select(cols)
|> df_to_tensor.()
|> Nx.transpose()
covariance_tensor =
standardized_tensor
|> Nx.transpose()
|> Nx.dot(standardized_tensor)
|> Nx.divide(DataFrame.n_rows(standardized_df))
add_cols_label = fn list, cols_ ->
[{"x", cols_} | list]
end
covariance_df =
cols
|> Stream.with_index()
|> Enum.map(fn {col, index} ->
{col, Nx.to_flat_list(covariance_tensor[index])}
end)
|> add_cols_label.(cols)
|> DataFrame.new()
covariance_df
|> Kino.DataTable.new(keys: ["x" | cols])
local_tax_heatmap =
cols
|> Stream.with_index()
|> Enum.map(fn {col_1, index_1} ->
%{
x: "localTax",
y: col_1,
covariance: Nx.to_number(covariance_tensor[0][index_1])
}
end)
|> List.flatten()
VegaLite.new(width: 100, height: 1600)
|> VegaLite.data_from_values(local_tax_heatmap)
|> VegaLite.mark(:rect)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :nominal)
|> VegaLite.encode_field(
:fill,
"covariance",
type: :quantitative,
scale: [
domain: [-1, 1],
scheme: :blueorange
]
)
# 通信業
scatter.(target_df, "localTax", "37", 300)
prefectures_industry_bar.("37")
# 金融商品取引業,商品先物取引業
target_df
|> DataFrame.filter_with(&Series.not_equal(&1["prefCode"], 13))
|> scatter.("localTax", "65", 300)
prefectures_industry_bar.("65")
# 医療業
target_df
|> DataFrame.filter_with(&Series.not_equal(&1["prefCode"], 13))
|> scatter.("localTax", "83", 300)
prefectures_industry_bar.("83")
# 社会保険・社会福祉・介護事業
target_df
|> scatter.("localTax", "85", 300)
prefectures_industry_bar.("85")
elderly_heatmap =
cols
|> Stream.with_index()
|> Enum.map(fn {col_1, index_1} ->
%{
x: "老年率",
y: col_1,
covariance: Nx.to_number(covariance_tensor[5][index_1])
}
end)
|> List.flatten()
VegaLite.new(width: 100, height: 1600)
|> VegaLite.data_from_values(elderly_heatmap)
|> VegaLite.mark(:rect)
|> VegaLite.encode_field(:x, "x", type: :nominal)
|> VegaLite.encode_field(:y, "y", type: :nominal)
|> VegaLite.encode_field(
:fill,
"covariance",
type: :quantitative,
scale: [
domain: [-1, 1],
scheme: :blueorange
]
)
# 不動産取引業
|> scatter.(target_df, "老年率", "68", 300)
prefectures_industry_bar.("68")