Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Untitled notebook

titanic.livemd

Untitled notebook

Mix.install([
  {:req, "~> 0.3.6"},
  {:explorer, "~> 0.5.6"},
  {:kino_explorer, "~> 0.1.4"},
  {:nx, "~> 0.5.1"},
  {:kino_vega_lite, path: "/home/kuku/Projects/kino_vega_lite"}
  # {:kino_vega_lite, "~> 0.1.7"}
])

Section

response =
  Req.get!(
    "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/6907bb3a38bfbb6fccf3a8b1edfb90e39714d14f/titanic_dataset.csv"
  )
require Explorer.DataFrame, as: DF
alias Explorer.Series
titanic = DF.load_csv!(response.body)
tidy_names = fn name ->
  name
  |> String.downcase()
  |> String.replace(~r/\s/, " ")
  |> String.replace(~r/[^A-Za-z\s]/, "")
  |> String.replace(" ", "_")
end
get_title = fn name ->
  case Regex.run(~r' ([A-Za-z]+)\.', name) do
    [_, title] when title == "Mme" -> "Mrs"
    [_, title] when title in ~w[Mlle Ms] -> "Miss"
    [_, title] when title in ~w[Miss Mrs Mr Master] -> title
    [_, _other] -> "Other"
  end
end
modified =
  titanic
  |> DF.rename_with(tidy_names)
  |> DF.mutate(%{
    named_cabin?: not is_nil(cabin),
    female?: cast(sex, :category),
    embarked: cast(Series.fill_missing(embarked, :max), :category),
    age_category: cast(divide(Series.fill_missing(age, :mean), 10), :integer),
    normalized_fare: log(fare) / max(log(fare))
  })
  |> DF.put(:title, Series.cast(Series.transform(titanic["Name"], &get_title.(&1)), :category))

modified
|> DF.concat_columns(DF.dummies(modified, "title"))
alias VegaLite, as: Vl

Vl.new(width: 2000)
|> Vl.data_from_values(modified, only: ["fare", "age_category", "parch", "survived"])
|> Vl.repeat(
  [
    row: ["fare", "age_category", "parch", "survived"],
    column: ["fare", "age_category", "parch", "survived"]
  ],
  Vl.new()
  |> Vl.mark(:point)
  # The graphic will be repeated for every combination of :x and :y
  # taken from the :row and :column lists above
  |> Vl.encode_repeat(:x, :column, type: :quantitative)
  |> Vl.encode_repeat(:y, :row, type: :quantitative)
  |> Vl.encode_field(:color, "survived", type: :quantitative)
)
Vl.new(width: 1000)
|> Vl.data_from_values(modified, only: ["normalized_fare", "age", "survived"])
|> Vl.mark(:point)
|> Vl.encode_field(:x, "normalized_fare", type: :quantitative)
|> Vl.encode_field(:y, "age", type: :quantitative)
|> Vl.encode_field(:color, "survived",
  type: :quantitative,
  bin: [maxbins: 5],
  scale: [scheme: "pastel1"]
)
a = DF.new(a: ["1", "2", "6"])
b = a["a"]
c = Series.from_list([[1, 2], [3.4]])
DF.new(a: [1, 2, 3])