Sponsored by AppSignal
Would you like to see your link here? Contact us
Notesclub

J-Archive

j_archive_exploration.livemd

J-Archive

Mix.install([
  {:jason, "~> 1.4"},
  {:req, "~> 0.3.11"},
  {:floki, "~> 0.34.3"}
])

Experimentation

Get Season IDs

req = Req.new(base_url: "https://j-archive.com")

%{status: 200, body: seasons_html} = Req.get!(req, url: "listseasons.php")

season_ids =
  seasons_html
  |> Floki.parse_document!()
  |> Floki.attribute("#content a", "href")
  |> Enum.map(fn "showseason.php?season=" <> season_id -> season_id end)

For each season, get all game ids

get_game_ids = fn season_id ->
  Req.get!(req, url: "showseason.php", params: [season: season_id]).body
  |> Floki.parse_document!()
  |> Floki.attribute("#content table td:first-child a", "href")
  |> Enum.map(fn href ->
    IO.puts(href)
    [_, id] = Regex.run(~r/showgame.php\?game_id=([0-9]+)/, href)
    id
  end)
end
get_game_ids.(1)
parse_round_category = fn html, round, category_idx ->
  {round_selector, round_abbrev, dollar_multiplier} =
    case round do
      :jeopardy -> {"#jeopardy_round", "J", 1}
      :double_jeopardy -> {"#double_jeopardy_round", "DJ", 2}
    end

  html = Floki.find(html, round_selector)
  category_name = Floki.find(html, ".category_name") |> Enum.at(category_idx - 1) |> Floki.text()

  clues =
    Enum.map(1..6, fn clue_idx ->
      clue_selector = "#clue_#{round_abbrev}_#{category_idx}_#{clue_idx}"
      answer_selector = "#clue_#{round_abbrev}_#{category_idx}_#{clue_idx}_r .correct_response"

      %{
        clue: Floki.find(html, clue_selector) |> Floki.text(),
        answer: Floki.find(html, answer_selector) |> Floki.text(),
        value: dollar_multiplier * 100 * clue_idx,
        category: category_name
      }
    end)

  %{category: category_name, clues: clues}
end
download_and_parse_game = fn game_id ->
  html =
    Req.get!(req, url: "showgame.php", params: [game_id: game_id]).body
    |> Floki.parse_document!()

  %{
    jeopardy: Enum.map(1..6, &amp;parse_round_category.(html, :jeopardy, &amp;1)),
    double_jeopardy: Enum.map(1..6, &amp;parse_round_category.(html, :double_jeopardy, &amp;1)),
    final_jeopardy: %{
      category: Floki.find(html, "#final_jeopardy_round .category_name") |> Floki.text(),
      clue: Floki.find(html, "#clue_FJ") |> Floki.text(),
      answer: Floki.find(html, "#clue_FJ_r .correct_response") |> Floki.text()
    },
    categories: %{
      jeopardy: Floki.find(html, "#jeopardy_round .category_name") |> Enum.map(&amp;Floki.text/1),
      double_jeopardy:
        Floki.find(html, "#double_jeopardy_round .category_name") |> Enum.map(&amp;Floki.text/1),
      final_jeopardy: Floki.find(html, "#final_jeopardy_round .category_name") |> Floki.text()
    }
  }
end
html =
  Req.get!(req, url: "showgame.php", params: [game_id: 7620]).body
  |> Floki.parse_document!()

Floki.find(html, "#double_jeopardy_round .category_name") |> Enum.map(&amp;Floki.text/1)
download_and_parse_game.(7620)

Implementation

defmodule JArchive.Parser do
  def parse_game(html) do
    %{
      jeopardy: Enum.map(1..6, &amp;parse_round_category(html, :jeopardy, &amp;1)),
      double_jeopardy: Enum.map(1..6, &amp;parse_round_category(html, :double_jeopardy, &amp;1)),
      final_jeopardy: %{
        category: Floki.find(html, "#final_jeopardy_round .category_name") |> Floki.text(),
        clue: Floki.find(html, "#clue_FJ") |> Floki.text(),
        answer: Floki.find(html, "#clue_FJ_r .correct_response") |> Floki.text()
      },
      categories: %{
        jeopardy: Floki.find(html, "#jeopardy_round .category_name") |> Enum.map(&amp;Floki.text/1),
        double_jeopardy:
          Floki.find(html, "#double_jeopardy_round .category_name") |> Enum.map(&amp;Floki.text/1),
        final_jeopardy: Floki.find(html, "#final_jeopardy_round .category_name") |> Floki.text()
      },
      air_date: parse_air_date(html),
      contestants: Floki.find(html, "#contestants .contestants a") |> Enum.map(&amp;Floki.text/1),
      comments: Floki.find(html, "#game_comments") |> Floki.text()
    }
  end

  def parse_round_category(entire_game_html, round, category_idx) do
    {round_selector, round_abbrev, dollar_multiplier} =
      case round do
        :jeopardy -> {"#jeopardy_round", "J", 1}
        :double_jeopardy -> {"#double_jeopardy_round", "DJ", 2}
      end

    html = Floki.find(entire_game_html, round_selector)

    category_name =
      Floki.find(html, ".category_name") |> Enum.at(category_idx - 1) |> Floki.text()

    clues =
      Enum.map(1..5, fn clue_idx ->
        clue_selector = "#clue_#{round_abbrev}_#{category_idx}_#{clue_idx}"
        answer_selector = "#clue_#{round_abbrev}_#{category_idx}_#{clue_idx}_r .correct_response"

        %{
          clue: Floki.find(html, clue_selector) |> Floki.text(),
          answer: Floki.find(html, answer_selector) |> Floki.text(),
          value: dollar_multiplier * 100 * clue_idx,
          category: category_name
        }
      end)

    %{category: category_name, clues: clues}
  end

  def parse_air_date(html) do
    title = Floki.find(html, "head title") |> Floki.text()

    with [_, date_str] <- Regex.run(~r/aired (\d\d\d\d-\d\d?-\d\d?)$/, title),
         {:ok, air_date} <- Date.from_iso8601(date_str) do
      air_date
    else
      _ -> nil
    end
  end
end
defmodule JArchive do
  @req Req.new(base_url: "https://j-archive.com")

  def download_and_parse_game(game_id) do
    Req.get!(@req, url: "showgame.php", params: [game_id: game_id]).body
    |> Floki.parse_document!()
    |> JArchive.Parser.parse_game()
  end
end
html =
  Req.get!("https://j-archive.com/showgame.php", params: [game_id: 38]).body
  |> Floki.parse_document!()
JArchive.Parser.parse_game(html).contestants
JArchive.download_and_parse_game(38)