Untitled notebook
Mix.install([
{:req, "~> 0.3.6"},
{:explorer, "~> 0.5.6"},
{:kino_explorer, "~> 0.1.4"},
{:nx, "~> 0.5.1"},
{:kino_vega_lite, path: "/home/kuku/Projects/kino_vega_lite"}
# {:kino_vega_lite, "~> 0.1.7"}
])
Section
response =
Req.get!(
"https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/6907bb3a38bfbb6fccf3a8b1edfb90e39714d14f/titanic_dataset.csv"
)
require Explorer.DataFrame, as: DF
alias Explorer.Series
titanic = DF.load_csv!(response.body)
tidy_names = fn name ->
name
|> String.downcase()
|> String.replace(~r/\s/, " ")
|> String.replace(~r/[^A-Za-z\s]/, "")
|> String.replace(" ", "_")
end
get_title = fn name ->
case Regex.run(~r' ([A-Za-z]+)\.', name) do
[_, title] when title == "Mme" -> "Mrs"
[_, title] when title in ~w[Mlle Ms] -> "Miss"
[_, title] when title in ~w[Miss Mrs Mr Master] -> title
[_, _other] -> "Other"
end
end
modified =
titanic
|> DF.rename_with(tidy_names)
|> DF.mutate(%{
named_cabin?: not is_nil(cabin),
female?: cast(sex, :category),
embarked: cast(Series.fill_missing(embarked, :max), :category),
age_category: cast(divide(Series.fill_missing(age, :mean), 10), :integer),
normalized_fare: log(fare) / max(log(fare))
})
|> DF.put(:title, Series.cast(Series.transform(titanic["Name"], &get_title.(&1)), :category))
modified
|> DF.concat_columns(DF.dummies(modified, "title"))
alias VegaLite, as: Vl
Vl.new(width: 2000)
|> Vl.data_from_values(modified, only: ["fare", "age_category", "parch", "survived"])
|> Vl.repeat(
[
row: ["fare", "age_category", "parch", "survived"],
column: ["fare", "age_category", "parch", "survived"]
],
Vl.new()
|> Vl.mark(:point)
# The graphic will be repeated for every combination of :x and :y
# taken from the :row and :column lists above
|> Vl.encode_repeat(:x, :column, type: :quantitative)
|> Vl.encode_repeat(:y, :row, type: :quantitative)
|> Vl.encode_field(:color, "survived", type: :quantitative)
)
Vl.new(width: 1000)
|> Vl.data_from_values(modified, only: ["normalized_fare", "age", "survived"])
|> Vl.mark(:point)
|> Vl.encode_field(:x, "normalized_fare", type: :quantitative)
|> Vl.encode_field(:y, "age", type: :quantitative)
|> Vl.encode_field(:color, "survived",
type: :quantitative,
bin: [maxbins: 5],
scale: [scheme: "pastel1"]
)
a = DF.new(a: ["1", "2", "6"])
b = a["a"]
c = Series.from_list([[1, 2], [3.4]])
DF.new(a: [1, 2, 3])