Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Qiita 記事データ分析

livebooks/qiita/my_articles.livemd

Qiita 記事データ分析

Mix.install([
  {:req, "~> 0.5"},
  {:explorer, "~> 0.9"},
  {:kino, "~> 0.14"},
  {:kino_vega_lite, "~> 0.1"}
])

情報の設定

# Qiita のアクセストークンを入力する
token_input = Kino.Input.password("TOKEN")
base_url = "https://qiita.com/api/v2"
alias Explorer.DataFrame
alias Explorer.Series
require Explorer.DataFrame
auth_header = {"Authorization", "Bearer #{Kino.Input.read(token_input)}"}
Kino.nothing()

記事一覧を取得する

articles =
  "#{base_url}/authenticated_user/items"
  |> Req.get!(headers: [auth_header])
  |> Map.get(:body)
Enum.count(articles)

再帰的に全件取得するため、モジュールを定義する

defmodule Qiita do
  @moduledoc """
  Qiita API を呼び出す
  """

  @base_url "https://qiita.com/api/v2"

  @doc """
  1ページ分の記事一覧を取得する

  ## パラメータ

    - page: ページ番号
    - auth_header: 認証ヘッダー

  """
  @spec get_articles(integer, tuple) :: list
  def get_articles(page, auth_header) do
    "#{@base_url}/authenticated_user/items?page=#{page}"
    |> Req.get!(headers: [auth_header])
    |> Map.get(:body)
  end

  @doc """
  再帰的に記事一覧を取得する

  ## パラメータ

    - page: ページ番号
    - auth_header: 認証ヘッダー

  """
  @spec get_articles_cyclic(integer, tuple) :: list
  def get_articles_cyclic(page, auth_header) do
    IO.inspect("get page #{page}")
    articles = get_articles(page, auth_header)

    case articles do
      # 空であれば次ページを取得しない
      [] ->
        IO.inspect("stop")
        articles

      # 空以外の場合は次ページを取得する
      _ ->
        articles ++ get_articles_cyclic(page + 1, auth_header)
    end
  end

  @doc """
  記事一覧を全件取得する

  ## パラメータ

    - page: ページ番号
    - auth_header: 認証ヘッダー

  """
  @spec get_all_articles(tuple) :: list
  def get_all_articles(auth_header) do
    get_articles_cyclic(1, auth_header)
  end
end
# 1ページ分取得
Qiita.get_articles(1, auth_header)
# 全件取得
all_articles = Qiita.get_all_articles(auth_header)
Enum.count(all_articles)

記事一覧をデータフレーム化する

qiita_df =
  all_articles
  |> Enum.map(fn item ->
    %{
      "title" => item["title"],
      # 限定公開フラグ
      "private" => item["private"],
      # 作成日 日付は NaiveDateTime に変換する
      "created_at" => NaiveDateTime.from_iso8601!(item["created_at"]),
      # 閲覧数
      "page_views_count" => item["page_views_count"],
      # いいね数
      "likes_count" => item["likes_count"],
      # いいね率 = いいね数 / 閲覧数
      "likes_rate" => item["likes_count"] / item["page_views_count"],
      # ストック数
      "stocks_count" => item["stocks_count"],
      # ストック率 = ストック数 / 閲覧数
      "stocks_rate" => item["stocks_count"] / item["page_views_count"],
      # タグ 複数のため、 `、` で結合する
      "tags" => item["tags"] |> Enum.map(& &1["name"]) |> Enum.join(","),
      # 記事の長さ(文字数)
      "length" => item["body"] |> String.length()
    }
  end)
  |> DataFrame.new()
  |> DataFrame.select([
    "title",
    "private",
    "created_at",
    "page_views_count",
    "likes_count",
    "likes_rate",
    "stocks_count",
    "stocks_rate",
    "tags",
    "length"
  ])

Kino.DataTable.new(qiita_df)

記事一覧を分析する

qiita_df
|> DataFrame.filter(private == false)
|> DataFrame.select(["page_views_count", "likes_count", "stocks_count"])
|> DataFrame.describe()
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: page_views_count)
|> DataFrame.select(["title", "page_views_count", "likes_count", "stocks_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: likes_count)
|> DataFrame.select(["title", "likes_count", "page_views_count", "stocks_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: stocks_count)
|> DataFrame.select(["title", "stocks_count", "likes_count", "page_views_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: likes_rate)
|> DataFrame.select(["title", "likes_rate", "likes_count", "page_views_count"])
|> Kino.DataTable.new()
qiita_df
|> DataFrame.sort_by(desc: stocks_rate)
|> DataFrame.select(["title", "stocks_rate", "stocks_count", "page_views_count"])
|> Kino.DataTable.new()

グラフ化する

get_values = fn df, col ->
  df
  |> DataFrame.pull(col)
  |> Series.to_list()
end
x = get_values.(qiita_df, "title")
y = get_values.(qiita_df, "page_views_count")

VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
  :x,
  "x",
  type: :nominal,
  title: "title",
  # 閲覧数の降順に並べる
  sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
  :y,
  "y",
  type: :quantitative,
  title: "page_views_count"
)
x = get_values.(qiita_df, "title")
y = get_values.(qiita_df, "likes_count")

VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
  :x,
  "x",
  type: :nominal,
  title: "title",
  sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
  :y,
  "y",
  type: :quantitative,
  title: "likes_count"
)
x = get_values.(qiita_df, "created_at")
y = get_values.(qiita_df, "page_views_count")

VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:line)
|> VegaLite.encode_field(
  :x,
  "x",
  type: :temporal,
  title: "created_at"
)
|> VegaLite.encode_field(
  :y,
  "y",
  type: :quantitative,
  title: "page_views_count"
)
x = get_values.(qiita_df, "length")
y = get_values.(qiita_df, "stocks_count")

VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:line)
|> VegaLite.encode_field(
  :x,
  "x",
  type: :quantitative,
  title: "length"
)
|> VegaLite.encode_field(
  :y,
  "y",
  type: :quantitative,
  title: "likes_count"
)

タグを分析する

qiita_tag_df =
  all_articles
  |> Enum.flat_map(fn item ->
    item["tags"]
    |> Enum.map(fn tag ->
      %{
        "tag" => tag["name"],
        "title" => item["title"],
        "page_views_count" => item["page_views_count"],
        "likes_count" => item["likes_count"],
        "stocks_count" => item["stocks_count"]
      }
    end)
  end)
  |> DataFrame.new()
  |> DataFrame.select(["title", "tag", "page_views_count", "likes_count", "stocks_count"])

Kino.DataTable.new(qiita_tag_df)
qiita_tag_summarised_df =
  qiita_tag_df
  |> DataFrame.group_by(["tag"])
  |> DataFrame.summarise(
    articles_count: count(page_views_count),
    page_views_count_sum: sum(page_views_count),
    page_views_count_mean: mean(page_views_count),
    likes_count_sum: sum(likes_count),
    likes_count_mean: mean(likes_count),
    stocks_count_sum: sum(stocks_count),
    stocks_count_mean: mean(stocks_count)
  )
  |> DataFrame.sort_by(desc: page_views_count_sum)

Kino.DataTable.new(qiita_tag_summarised_df)
x = get_values.(qiita_tag_summarised_df, "tag")
y = get_values.(qiita_tag_summarised_df, "articles_count")

VegaLite.new(width: 800, height: 400)
|> VegaLite.data_from_values(x: x, y: y)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(
  :x,
  "x",
  type: :nominal,
  title: "tag",
  sort: %{"field" => "y", "order" => "descending"}
)
|> VegaLite.encode_field(
  :y,
  "y",
  type: :quantitative,
  title: "count"
)
plot_tag_bar = fn col, agg ->
  x = get_values.(qiita_tag_summarised_df, "tag")
  y = get_values.(qiita_tag_summarised_df, "#{col}_#{agg}")

  VegaLite.new(width: 800, height: 400)
  |> VegaLite.data_from_values(x: x, y: y)
  |> VegaLite.mark(:bar)
  |> VegaLite.encode_field(
    :x,
    "x",
    type: :nominal,
    title: "tag",
    sort: %{"field" => "y", "order" => "descending"}
  )
  |> VegaLite.encode_field(
    :y,
    "y",
    type: :quantitative,
    title: "#{col}_#{agg}"
  )
end
plot_tag_bar.("page_views_count", "sum")
plot_tag_bar.("page_views_count", "mean")
plot_tag_bar.("likes_count", "sum")
plot_tag_bar.("likes_count", "mean")
plot_tag_bar.("stocks_count", "sum")
plot_tag_bar.("stocks_count", "mean")