ElixirZone: Kino, Nx, Explorer, and VegaLite
Resource Links
NX:
Kino:
Explorer (DataFrames for Elixir):
- https://github.com/elixir-nx/explorer
- https://raw.githubusercontent.com/elixir-nx/explorer/main/notebooks/exploring_explorer.livemd
Vega & VegaLite:
- https://hexdocs.pm/vega_lite/VegaLite.html
- https://github.com/livebook-dev/vega_lite
- https://vega.github.io/
- https://vega.github.io/vega-lite/examples/
- https://vega.github.io/vega-lite/tutorials/getting_started.html
- https://vega.github.io/vega-lite/tutorials/explore.html
Vega-Lite:
Screenshots of technologies we’ll play with
More established data analytics platforms
Cool related stuff I won’t cover today
Let’s get started!
Mix.install([
{:vega_lite, "~> 0.1.3"},
{:kino, "~> 0.5.0"},
{:jason, "~> 1.2"},
{:explorer, "~> 0.1.0-dev", github: "elixir-nx/explorer", branch: "main"},
{:ecto, "~> 3.7.1"}
])
Nx
t = Nx.tensor([[1, 2], [3, 4], [5, 6]])
t |> Nx.to_heatmap()
Nx.shape(t)
Nx.exp(t)
[[:math.exp(1), :math.exp(2)], [:math.exp(3), :math.exp(4)], [:math.exp(5), :math.exp(6)]]
t |> Nx.exp() |> Nx.to_heatmap()
Nx.divide(Nx.exp(t), Nx.sum(Nx.exp(t)))
Kino.Input
who = Kino.Input.text("What is your name?")
"Hello, #{Kino.Input.read(who)}"
Kino.ETS
:ets.new(:my_ets, [:ordered_set, :named_table])
:ets.insert(:my_ets, {"555-1234", "John Smith"})
:ets.insert(:my_ets, {"555-4321", "Darius Jones"})
Kino.ETS.new(:my_ets)
:ets.insert(:my_ets, {"867-5309", "Jenny"})
:ets.insert(:my_ets, {"867-1111", "Bob"})
Kino.Ecto
defmodule Muppet do
use Ecto.Schema
import Ecto.Changeset
@primary_key false
embedded_schema do
field(:name, :string, null: false)
field(:puppeteer, :string)
field(:catchphrase, :string)
# field(:admin, :boolean)
end
def new(attrs) do
changeset(nil, attrs)
end
def changeset(maybe_muppet, attrs) do
muppet =
maybe_muppet
|> ensure_muppet()
|> cast(attrs, [:name, :puppeteer, :catchphrase])
|> validate_required([:name])
with %{valid?: true} <- muppet,
{:ok, muppet} <- apply_changes(muppet) do
muppet
end
end
defp ensure_muppet(%Muppet{} = muppet), do: muppet
defp ensure_muppet(_not_muppet), do: %Muppet{}
end
kermit = Muppet.new(%{name: "Kermit", puppeteer: "Jim Henson", catchphrase: "Hiyo!", admin: true})
# Example inspired by https://akoutmos.com/post/ecto-repo-testing/
defmodule Muppet.Repo do
alias Muppet
def all(queryable, opts \\ [])
def all(Muppet, _opts) do
[
%Muppet{
name: "Kermit",
puppeteer: "Jim Henson",
catchphrase: "Hiyo!"
},
%Muppet{
name: "Beaker",
catchphrase: "Meep meep",
puppeteer: "Richard Hunt"
},
%Muppet{name: "Grover", puppeteer: "Frank Oz", catchphrase: "It's SUP-er Grover!"}
]
end
def all(_queryable, opts), do: all(Muppet, opts)
def aggregate(Muppet, :count, _opts \\ []) do
3
end
end
grover = Muppet.new(%{name: "Grover", puppeteer: "Frank Oz", catchphrase: "It's SUP-er Grover!"})
Kino.Ecto.new(Muppet, Muppet.Repo)
Kino.JS
defmodule Kino.Mermaid do
use Kino.JS
def new(graph) do
Kino.JS.new(__MODULE__, graph)
end
asset "main.js" do
"""
import "https://cdn.jsdelivr.net/npm/mermaid@8.13.3/dist/mermaid.min.js";
mermaid.initialize({ startOnLoad: false });
export function init(ctx, graph) {
mermaid.render("graph1", graph, (svgSource, bindListeners) => {
ctx.root.innerHTML = svgSource;
bindListeners && bindListeners(ctx.root);
});
}
"""
end
end
Kino.Mermaid.new("""
graph TD;
A-->B;
A-->C;
B-->D;
C-->D;
""")
Explorer
alias Explorer.DataFrame
alias Explorer.Series
alias Explorer.Datasets
ff = Datasets.fossil_fuels()
ff |> DataFrame.sample(5)
ff |> DataFrame.sample(0.01)
ff |> DataFrame.take([300, 301, 302])
ff |> DataFrame.slice(-5, 5)
ff["solid_fuel"]
ff[["year", "country"]]
ff |> DataFrame.names()
ff |> DataFrame.dtypes()
ff |> DataFrame.shape()
{DataFrame.n_rows(ff), DataFrame.n_cols(ff)}
ff |> DataFrame.head()
ff |> DataFrame.tail(8)
usa = ff |> DataFrame.filter(&Series.equal(&1["country"], "UNITED STATES OF AMERICA"))
DataFrame.table(usa)
ff |> DataFrame.table(30)
iris = Datasets.iris()
dat = iris |> DataFrame.to_map() |> Keyword.new()
VegaLite
alias VegaLite, as: Vl
Vl.from_json("""
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"description": "Shows the relationship between horsepower and the numbver of cylinders using point marks with random offset (jittering).",
"data": {"url": "https://vega.github.io/editor/data/cars.json"},
"transform": [{"calculate": "random()", "as": "random"}],
"height": {"step": 50},
"mark": "point",
"encoding": {
"x": {"field": "Horsepower", "type": "quantitative"},
"y": {"field": "Cylinders", "type": "ordinal"},
"yOffset": {"field": "random", "type": "quantitative"}
}
}
""")
# |> Vl.to_spec()
Vl.from_json("""
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"description": "Shows the relationship between horsepower and the numbver of cylinders using point marks with random offset (jittering).",
"data": {"url": "https://vega.github.io/editor/data/cars.json"},
"transform": [{"calculate": "random()", "as": "random"}],
"height": {"step": 50},
"mark": "point",
"encoding": {
"x": {"field": "Horsepower", "type": "quantitative"},
"y": {"field": "Cylinders", "type": "ordinal"},
"yOffset": {"field": "random", "type": "quantitative"}
}
}
""")
|> IO.inspect()
The following example comes from Christopher Grainger at
Vl.new(height: 600, width: 600)
|> Vl.data_from_series(dat)
|> Vl.mark(:point, filled: true, tooltip: %{content: "data"})
|> Vl.encode_field(:x, "petal_length", type: :quantitative)
|> Vl.encode_field(:y, "petal_width", type: :quantitative)
|> Vl.encode(:color, field: "species", type: :nominal)
# Initialize the specification, optionally with some top-level properties
Vl.new(width: 400, height: 400)
# Specify data source for the graphic, see the data_from_* functions
|> Vl.data_from_series(iteration: 1..100, score: 1..100)
# |> Vl.data_from_values([%{iteration: 1, score: 1}, ...])
# |> Vl.data_from_url("...")
# Pick a visual mark for the graphic
|> Vl.mark(:line)
# |> Vl.mark(:point, tooltip: true)
# Map data fields to visual properties of the mark, like position or shape
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "score", type: :quantitative)
Vl.new(width: 400, height: 400)
# Specify data source for the graphic, see the data_from_* functions
|> Vl.data_from_series(iteration: 1..100, score: 1..100)
# |> Vl.mark(:line)
|> Vl.mark(:point, tooltip: true)
# Map data fields to visual properties of the mark, like position or shape
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "score", type: :quantitative)
|> Vl.encode_field(:color, "score", type: :quantitative)
After recording this, I realized that I could/should have used Enum.shuffle/1
instead of creating an anonymous function. I had looked for such a function but somehow missed it. Could have been:
> random_values = fn -> 1..100 |> Enum.to_list() |> Enum.shuffle() end
random_values = fn ->
1..100 |> Enum.to_list() |> Enum.sort_by(fn _num -> :random.uniform() end)
end
Vl.new(width: 400, height: 400)
# Specify data source for the graphic, see the data_from_* functions
|> Vl.data_from_series(age: 1..100, score: random_values.())
|> Vl.mark(:point, tooltip: true)
|> Vl.encode_field(:x, "age", type: :quantitative)
|> Vl.encode_field(:y, "score", type: :quantitative)
|> Vl.encode_field(:color, "score", type: :quantitative)
age_scores =
Enum.zip(random_values.(), random_values.())
|> Enum.map(fn {age, score} -> %{age: age, score: score} end)
display =
Vl.new(width: 400, height: 400)
# Specify data source for the graphic, see the data_from_* functions
|> Vl.data_from_series(age: random_values.(), score: random_values.())
|> Vl.mark(:point, tooltip: true)
|> Vl.encode_field(:x, "age", type: :quantitative)
|> Vl.encode_field(:y, "score", type: :quantitative)
|> Vl.encode_field(:color, "score", type: :quantitative)
|> Kino.VegaLite.new()
|> Kino.render()
Enum.each(age_scores, fn score ->
Kino.VegaLite.push(display, score)
Process.sleep(100)
end)
Vl.new()
|> Vl.data_from_values([
%{a: "Canada", score: 2},
%{a: "United Kingdom", score: 7},
%{a: "Canada", score: 6},
%{a: "Latvia", score: 8}
])
|> Vl.mark(:point)
|> Vl.encode_field(:x, "a", type: :nominal)
|> Vl.encode_field(:y, "score", type: :quantitative)
|> IO.inspect()
Vl.from_json("""
{
"data": { "url": "https://vega.github.io/editor/data/cars.json" },
"mark": "point",
"encoding": {
"x": { "field": "Horsepower", "type": "quantitative" },
"y": { "field": "Miles_per_Gallon", "type": "quantitative" }
}
}
""")
# https://vega.github.io/vega-lite/tutorials/getting_started.html
Vl.new()
|> Vl.data_from_values([
%{a: "C", b: 2},
%{a: "C", b: 7},
%{a: "C", b: 4},
%{a: "D", b: 1},
%{a: "D", b: 2},
%{a: "D", b: 6},
%{a: "E", b: 8},
%{a: "E", b: 4},
%{a: "E", b: 7}
])
|> Vl.mark(:point, tooltip: true)
|> Vl.encode_field(:x, "a", type: :nominal)
|> Vl.encode_field(:y, "b", type: :quantitative)
# |> inspect()
|> tap(&IO.inspect/1)
# https://vega.github.io/vega-lite/tutorials/getting_started.html
Vl.new()
|> Vl.data_from_values([
%{a: "C", b: 2},
%{a: "C", b: 7},
%{a: "C", b: 4},
%{a: "D", b: 1},
%{a: "D", b: 2},
%{a: "D", b: 6},
%{a: "E", b: 8},
%{a: "E", b: 4},
%{a: "E", b: 7}
])
|> Vl.mark(:bar)
|> Vl.encode_field(:y, "a", type: :nominal)
|> Vl.encode_field(:x, "b", type: :quantitative, aggregate: :mean, title: "avg(b)")
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:tick)
|> Vl.encode_field(:x, "precipitation",
type: :quantitative,
title: "Daily precipitation in Seattle"
)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "precipitation", bin: true, title: "Daily precipitation in Seattle")
|> Vl.encode(:y, aggregate: :count)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", time_unit: :month)
|> Vl.encode_field(:y, "precipitation", aggregate: :mean)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", time_unit: :yearmonth)
|> Vl.encode_field(:y, "temp_max", aggregate: :max)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", time_unit: :year)
|> Vl.encode_field(:y, "temp_max", aggregate: :mean)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.transform(calculate: "datum.temp_max - datum.temp_min", as: "temp_range")
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", time_unit: :month)
|> Vl.encode_field(:y, "temp_range", aggregate: :mean)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "date", type: :ordinal, time_unit: :month)
|> Vl.encode(:y, aggregate: :count, type: :quantitative)
|> Vl.encode_field(:color, "weather", type: :nominal)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/seattle-weather.csv")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "date", type: :ordinal, time_unit: :month, title: "Month of the year")
|> Vl.encode(:y, aggregate: :count, type: :quantitative)
|> Vl.encode_field(:color, "weather",
type: :nominal,
title: "Weather type",
scale: [
domain: ["sun", "fog", "drizzle", "rain", "snow"],
range: ["#e7ba52", "#c7c7c7", "#aec7e8", "#1f77b4", "#9467bd"]
]
)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/weather.csv")
|> Vl.transform(filter: "datum.location == 'Seattle'")
|> Vl.concat([
Vl.new()
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "date", time_unit: :month, type: :ordinal)
|> Vl.encode_field(:y, "precipitation", aggregate: :mean),
Vl.new()
|> Vl.mark(:point)
|> Vl.encode_field(:x, "temp_min", bin: true)
|> Vl.encode_field(:y, "temp_max", bin: true)
|> Vl.encode(:size, aggregate: :count)
])
# Source: https://vega.github.io/vega-lite/examples/stacked_area_normalize.html
Vl.from_json("""
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://vega.github.io/editor/data/unemployment-across-industries.json"},
"width": 300, "height": 200,
"mark": "area",
"encoding": {
"x": {
"timeUnit": "yearmonth", "field": "date",
"axis": {"domain": false, "format": "%Y"}
},
"y": {
"aggregate": "sum", "field": "count",
"axis": null,
"stack": "normalize"
},
"color": {"field":"series", "scale":{"scheme": "category20b"}}
}
}
""")
Vl.from_json("""
{
"data": { "url": "https://vega.github.io/editor/data/cars.json" },
"mark": "point",
"encoding": {
"x": { "field": "Horsepower", "type": "quantitative" },
"y": { "field": "Miles_per_Gallon", "type": "quantitative" }
}
}
""")
# https://vega.github.io/vega-lite/examples/point_2d.html
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/cars.json")
|> Vl.mark(:point, tooltip: true)
|> Vl.encode_field(:x, "Horsepower", type: :quantitative)
|> Vl.encode_field(:y, "Miles_per_Gallon", type: :quantitative)
# https://vega.github.io/vega-lite/examples/line.html
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/stocks.csv")
|> Vl.transform(filter: "datum.symbol==='GOOG'")
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", type: :temporal)
|> Vl.encode_field(:y, "price", type: :quantitative)
# Data exists for "MSFT" "GOOG" "IBM" "AAPL" and "AMZN"
stock_ticker = Kino.Input.text("Which stock ticker?")
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/stocks.csv")
|> Vl.transform(filter: "datum.symbol==='#{Kino.Input.read(stock_ticker)}'")
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", type: :temporal)
|> Vl.encode_field(:y, "price", type: :quantitative)
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/weather.csv")
|> Vl.repeat(
["temp_max", "precipitation", "wind"],
Vl.new()
|> Vl.mark(:line)
|> Vl.encode_field(:x, "date", time_unit: "month")
|> Vl.encode_repeat(:y, :repeat, aggregate: :mean, type: :quantitative)
|> Vl.encode_field(:color, "location")
)
# https://vega.github.io/vega-lite/examples/repeat_layer.html
# Vl.new()
# |> Vl.data_from_url("https://vega.github.io/editor/data/movies.json")
# |> Vl.repeat(
# ["US Gross", "Worldwide Gross"],
# Vl.new()
# |> Vl.mark(:line)
# |> Vl.encode_field(:x, "IMDB Rating", type: :quantitative, bin: true)
# |> Vl.encode_repeat(:y, :repeat, aggregate: :mean, type: :quantitative)
# |> Vl.encode_field(:color, :repeat, type: :nominal)
# )
# https://vega.github.io/vega-lite/docs/repeat.html#repeated-histogram-wrapped
Vl.new(columns: 2)
|> Vl.data_from_url("https://vega.github.io/editor/data/cars.json")
|> Vl.repeat(
["Horsepower", "Miles_per_Gallon", "Acceleration", "Displacement"],
Vl.new()
|> Vl.mark(:bar)
|> Vl.encode_repeat(:x, :repeat, bin: true)
|> Vl.encode_field(:y, "count", aggregate: :count)
|> Vl.encode_field(:color, "Origin")
)
# https://vega.github.io/vega-lite/docs/repeat.html#scatterplot-matrix-splom
Vl.from_json("""
{
"data": {"url": "https://vega.github.io/editor/data/penguins.json"},
"repeat": {
"row": [
"Beak Length (mm)",
"Beak Depth (mm)",
"Flipper Length (mm)",
"Body Mass (g)"
],
"column": [
"Body Mass (g)",
"Flipper Length (mm)",
"Beak Depth (mm)",
"Beak Length (mm)"
]
},
"spec": {
"width": 150,
"height": 150,
"mark": "point",
"encoding": {
"x": {
"field": {"repeat": "column"},
"type": "quantitative",
"scale": {"zero": false}
},
"y": {
"field": {"repeat": "row"},
"type": "quantitative",
"scale": {"zero": false}
},
"color": {"field": "Species", "type": "nominal"}
}
}
}
""")
# https://vega.github.io/vega-lite/docs/repeat.html#scatterplot-matrix-splom
Vl.new()
|> Vl.data_from_url("https://vega.github.io/editor/data/penguins.json")
|> Vl.repeat(
[
row: [
"Beak Length (mm)",
"Beak Depth (mm)",
"Flipper Length (mm)",
"Body Mass (g)"
],
column: [
"Body Mass (g)",
"Flipper Length (mm)",
"Beak Depth (mm)",
"Beak Length (mm)"
]
],
Vl.new(width: 150, height: 150)
|> Vl.mark(:point)
|> Vl.encode_repeat(:x, :column, type: :quantitative, scale: [zero: false])
|> Vl.encode_repeat(:y, :row, type: :quantitative, scale: [zero: false])
|> Vl.encode_field(:color, "Species", type: :nominal)
)
# https://vega.github.io/vega-lite/examples/area_gradient.html
Vl.from_json("""
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://vega.github.io/editor/data/cars.json"},
"mark": "rect",
"encoding": {
"y": {"field": "Origin", "type": "nominal"},
"x": {"field": "Cylinders", "type": "ordinal"},
"color": {"aggregate": "mean", "field": "Horsepower"}
},
"config": {
"axis": {"grid": true, "tickBand": "extent"}
}
}
""")
# https://vega.github.io/vega-lite/examples/arc_donut.html
Vl.from_json("""
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"description": "A simple donut chart with embedded data.",
"data": {
"values": [
{"category": 1, "value": 4},
{"category": 2, "value": 6},
{"category": 3, "value": 10},
{"category": 4, "value": 3},
{"category": 5, "value": 7},
{"category": 6, "value": 8}
]
},
"mark": {"type": "arc", "innerRadius": 50},
"encoding": {
"theta": {"field": "value", "type": "quantitative"},
"color": {"field": "category", "type": "nominal"}
}
}
""")
# https://vega.github.io/vega-lite/examples/stacked_bar_normalize.html
Vl.from_json("""
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": { "url": "https://vega.github.io/editor/data/population.json"},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"}
],
"mark": "bar",
"width": {"step": 17},
"encoding": {
"y": {
"aggregate": "sum", "field": "people",
"title": "population",
"stack": "normalize"
},
"x": {"field": "age"},
"color": {
"field": "gender",
"scale": {"range": ["#675193", "#ca8861"]}
}
}
}
""")