Qiita 記事データ分析
Mix.install([
{:req, "~> 0.5"},
{:explorer, "~> 0.9"},
{:kino, "~> 0.14"},
{:kino_vega_lite, "~> 0.1"}
])
情報の設定
# Qiita のアクセストークンを入力する
token_input = Kino.Input.password("TOKEN")
base_url = "https://qiita.com/api/v2"
alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame
auth_header = {"Authorization", "Bearer #{Kino.Input.read(token_input)}"}
Kino.nothing()
記事一覧を取得する
articles =
"#{base_url}/authenticated_user/items"
|> Req.get!(headers: [auth_header])
|> Map.get(:body)
Enum.count(articles)
再帰的に全件取得するため、モジュールを定義する
defmodule Qiita do
@moduledoc """
Qiita API を呼び出す
"""
@base_url "https://qiita.com/api/v2"
@doc """
1ページ分の記事一覧を取得する
## パラメータ
- page: ページ番号
- auth_header: 認証ヘッダー
"""
@spec get_articles(integer, tuple) :: list
def get_articles(page, auth_header) do
"#{@base_url}/authenticated_user/items?page=#{page}"
|> Req.get!(headers: [auth_header])
|> Map.get(:body)
end
@doc """
再帰的に記事一覧を取得する
## パラメータ
- page: ページ番号
- auth_header: 認証ヘッダー
"""
@spec get_articles_cyclic(integer, tuple) :: list
def get_articles_cyclic(page, auth_header) do
IO.inspect("get page #{page}")
articles = get_articles(page, auth_header)
case articles do
# 空であれば次ページを取得しない
[] ->
IO.inspect("stop")
articles
# 空以外の場合は次ページを取得する
_ ->
articles ++ get_articles_cyclic(page + 1, auth_header)
end
end
@doc """
記事一覧を全件取得する
## パラメータ
- page: ページ番号
- auth_header: 認証ヘッダー
"""
@spec get_all_articles(tuple) :: list
def get_all_articles(auth_header) do
get_articles_cyclic(1, auth_header)
end
end
# 1ページ分取得
Qiita.get_articles(1, auth_header)
# 全件取得
all_articles = Qiita.get_all_articles(auth_header)
Enum.count(all_articles)
記事一覧をデータフレーム化する
qiita_df =
all_articles
|> Enum.map(fn item ->
%{
"title" => item["title"],
# 限定公開フラグ
"private" => item["private"],
# 作成日 日付は NaiveDateTime に変換する
"created_at" => NaiveDateTime.from_iso8601!(item["created_at"]),
# 閲覧数
"page_views_count" => item["page_views_count"],
# いいね数
"likes_count" => item["likes_count"],
# いいね率 = いいね数 / 閲覧数
"likes_rate" => item["likes_count"] / item["page_views_count"],
# ストック数
"stocks_count" => item["stocks_count"],
# ストック率 = ストック数 / 閲覧数
"stocks_rate" => item["stocks_count"] / item["page_views_count"],
# タグ 複数のため、 `、` で結合する
"tags" => item["tags"] |> Enum.map(& &1["name"]) |> Enum.join(","),
# 記事の長さ(文字数)
"length" => item["body"] |> String.length()
}
end)
|> DataFrame.new()
|> DataFrame.select([
"title",
"private",
"created_at",
"page_views_count",
"likes_count",
"likes_rate",
"stocks_count",
"stocks_rate",
"tags",
"length"
])
Kino.DataTable.new(qiita_df)
記事一覧を分析する
qiita_df
|> DataFrame.filter(private == false)
|> DataFrame.select(["page_views_count", "likes_count", "stocks_count"])
|> DataFrame.describe()
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: page_views_count)
|> DataFrame.select(["title", "page_views_count", "likes_count", "stocks_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: likes_count)
|> DataFrame.select(["title", "likes_count", "page_views_count", "stocks_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: stocks_count)
|> DataFrame.select(["title", "stocks_count", "likes_count", "page_views_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: likes_rate)
|> DataFrame.select(["title", "likes_rate", "likes_count", "page_views_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: stocks_rate)
|> DataFrame.select(["title", "stocks_rate", "stocks_count", "page_views_count"])
|> Kino.DataTable.new()
グラフ化する
get_values = fn df, col ->
df
|> DataFrame.pull(col)
|> Series.to_list()
end
x = get_values.(qiita_df, "title")
y = get_values.(qiita_df, "page_views_count")
VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
:x,
"x",
type: :nominal,
title: "title",
# 閲覧数の降順に並べる
sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
:y,
"y",
type: :quantitative,
title: "page_views_count"
)
x = get_values.(qiita_df, "title")
y = get_values.(qiita_df, "likes_count")
VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
:x,
"x",
type: :nominal,
title: "title",
sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
:y,
"y",
type: :quantitative,
title: "likes_count"
)
x = get_values.(qiita_df, "created_at")
y = get_values.(qiita_df, "page_views_count")
VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:line)
|> VegaLite.encode_field(
:x,
"x",
type: :temporal,
title: "created_at"
)
|> VegaLite.encode_field(
:y,
"y",
type: :quantitative,
title: "page_views_count"
)
x = get_values.(qiita_df, "length")
y = get_values.(qiita_df, "stocks_count")
VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:line)
|> VegaLite.encode_field(
:x,
"x",
type: :quantitative,
title: "length"
)
|> VegaLite.encode_field(
:y,
"y",
type: :quantitative,
title: "likes_count"
)
タグを分析する
qiita_tag_df =
all_articles
|> Enum.flat_map(fn item ->
item["tags"]
|> Enum.map(fn tag ->
%{
"tag" => tag["name"],
"title" => item["title"],
"page_views_count" => item["page_views_count"],
"likes_count" => item["likes_count"],
"stocks_count" => item["stocks_count"]
}
end)
end)
|> DataFrame.new()
|> DataFrame.select(["title", "tag", "page_views_count", "likes_count", "stocks_count"])
Kino.DataTable.new(qiita_tag_df)
qiita_tag_summarised_df =
qiita_tag_df
|> DataFrame.group_by(["tag"])
|> DataFrame.summarise(
articles_count: count(page_views_count),
page_views_count_sum: sum(page_views_count),
page_views_count_mean: mean(page_views_count),
likes_count_sum: sum(likes_count),
likes_count_mean: mean(likes_count),
stocks_count_sum: sum(stocks_count),
stocks_count_mean: mean(stocks_count)
)
|> DataFrame.sort_by(desc: page_views_count_sum)
Kino.DataTable.new(qiita_tag_summarised_df)
x = get_values.(qiita_tag_summarised_df, "tag")
y = get_values.(qiita_tag_summarised_df, "articles_count")
VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
:x,
"x",
type: :nominal,
title: "tag",
sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
:y,
"y",
type: :quantitative,
title: "count"
)
plot_tag_bar = fn col, agg ->
x = get_values.(qiita_tag_summarised_df, "tag")
y = get_values.(qiita_tag_summarised_df, "#{col}_#{agg}")
VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
:x,
"x",
type: :nominal,
title: "tag",
sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
:y,
"y",
type: :quantitative,
title: "#{col}_#{agg}"
)
end
plot_tag_bar.("page_views_count", "sum")
plot_tag_bar.("page_views_count", "mean")
plot_tag_bar.("likes_count", "sum")
plot_tag_bar.("likes_count", "mean")
plot_tag_bar.("stocks_count", "sum")
plot_tag_bar.("stocks_count", "mean")