Histograms, Density Plots and Dot Plots
Mix.install([
{:vega_lite, "~> 0.1.5"},
{:kino_vega_lite, "~> 0.1.1"},
:jason
])
alias VegaLite, as: Vl
Histogram
Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "IMDB Rating", bin: true)
|> Vl.encode(:y, aggregate: :count)
Histogram (from Binned Data)
data = [
%{"bin_start" => 8, "bin_end" => 10, "count" => 7},
%{"bin_start" => 10, "bin_end" => 12, "count" => 29},
%{"bin_start" => 12, "bin_end" => 14, "count" => 71},
%{"bin_start" => 14, "bin_end" => 16, "count" => 127},
%{"bin_start" => 16, "bin_end" => 18, "count" => 94},
%{"bin_start" => 18, "bin_end" => 20, "count" => 54},
%{"bin_start" => 20, "bin_end" => 22, "count" => 17},
%{"bin_start" => 22, "bin_end" => 24, "count" => 5}
]
Vl.new()
|> Vl.data_from_values(data)
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "bin_start", bin: [binned: true, step: 2])
|> Vl.encode_field(:x2, "bin_end")
|> Vl.encode_field(:y, "count", type: :quantitative)
Log-scaled Histogram
Log-scaled Histogram. We may improve the support of this. See https://github.com/vega/vega-lite/issues/4792.
data = [
%{"x" => 0.01},
%{"x" => 0.1},
%{"x" => 1},
%{"x" => 1},
%{"x" => 1},
%{"x" => 1},
%{"x" => 10},
%{"x" => 10},
%{"x" => 100},
%{"x" => 500},
%{"x" => 800}
]
Vl.new()
|> Vl.data_from_values(data)
|> Vl.transform(calculate: "log(datum.x)/log(10)", as: "log_x")
|> Vl.transform(bin: true, field: "log_x", as: "bin_log_x")
|> Vl.transform(calculate: "pow(10, datum.bin_log_x)", as: "x1")
|> Vl.transform(calculate: "pow(10, datum.bin_log_x_end)", as: "x2")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "x1", scale: [type: :log, base: 10], axis: [tick_count: 5])
|> Vl.encode_field(:x2, "x2")
|> Vl.encode(:y, aggregate: :count)
Non-linear Histogram
A histogram for a data source that provides non-linear bins. Thanks to @Saba9 who helps create this example.
data = [
%{
"startTime" => "0",
"endTime" => "8.33",
"residency" => 0
},
%{
"startTime" => "8.33",
"endTime" => "12.50",
"residency" => 0
},
%{
"startTime" => "12.50",
"endTime" => "16.67",
"residency" => 31.17
},
%{
"startTime" => "16.67",
"endTime" => "33.33",
"residency" => 38.96
},
%{
"startTime" => "33.33",
"endTime" => "50.00",
"residency" => 6.49
},
%{
"startTime" => "50.00",
"endTime" => "66.67",
"residency" => 2.9
},
%{
"startTime" => "66.67",
"endTime" => "83.33",
"residency" => 2.6
},
%{
"startTime" => "83.33",
"endTime" => "∞",
"residency" => 16.88
}
]
Vl.new(width: [step: 40], height: 100, title: "Distribution of Frame Render Time (ms)")
|> Vl.data_from_values(data)
|> Vl.mark(:bar, corner_radius_end: 0, orient: :vertical)
|> Vl.encode_field(:x, "startTime",
type: :ordinal,
scale: [type: :point, padding: 0],
axis: [label_angle: 0]
)
|> Vl.encode_field(:x2, "endTime")
|> Vl.encode_field(:y, "residency",
type: :quantitative,
scale: [domain: [0, 100]],
axis: [label_expr: "datum.label + '%'"]
)
Relative Frequency
Relative frequency histogram. The data is binned with first transform. The number of values per bin and the total number are calculated in the second and third transform to calculate the relative frequency in the last transformation step.
Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/cars.json")
|> Vl.transform(bin: true, field: "Horsepower", as: "bin_Horsepower")
|> Vl.transform(
aggregate: [[op: "count", as: "Count"]],
groupby: ["bin_Horsepower", "bin_Horsepower_end"]
)
|> Vl.transform(joinaggregate: [[op: "sum", field: "Count", as: "TotalCount"]])
|> Vl.transform(calculate: "datum.Count/datum.TotalCount", as: "PercentOfTotal")
|> Vl.mark(:bar, tooltip: true)
|> Vl.encode_field(:x, "bin_Horsepower", bin: [binned: true], title: "Horsepower")
|> Vl.encode_field(:x2, "bin_Horsepower_end")
|> Vl.encode_field(:y, "PercentOfTotal", type: :quantitative, axis: [format: ".1~%"])
Density Plot
Vl.new(width: 400, height: 100)
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(density: "IMDB Rating", bandwidth: 0.3)
|> Vl.mark(:area)
|> Vl.encode_field(:x, "value", title: "IMDB Rating", type: :quantitative)
|> Vl.encode_field(:y, "density", type: :quantitative)
Stacked Density Estimates
Vl.new(width: 400, height: 80, title: "Distribution of Body Mass of Penguins")
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/penguins.json")
|> Vl.mark(:area, opacity: 0.5)
|> Vl.transform(density: "Body Mass (g)", groupby: ["Species"], extent: [2500, 6500])
# |> Vl.transform(density: "Body Mass (g)", groupby: ["Species"])
|> Vl.encode_field(:x, "value", type: :quantitative, title: "Body Mass (g)")
|> Vl.encode_field(:y, "density", type: :quantitative)
|> Vl.encode_field(:color, "Species", type: :nominal)
2D Histogram Scatterplot
Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.mark(:circle)
|> Vl.encode_field(:x, "IMDB Rating", bin: [maxbins: 10])
|> Vl.encode_field(:y, "Rotten Tomatoes Rating", bin: [maxbins: 10])
|> Vl.encode(:size, aggregate: :count)
2D Histogram Heatmap
Vl.new(width: 300, height: 200)
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(
filter: [
and: [
[field: "IMDB Rating", valid: true],
[field: "Rotten Tomatoes Rating", valid: true]
]
]
)
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "IMDB Rating", type: :quantitative, bin: [maxbins: 60])
|> Vl.encode_field(:y, "Rotten Tomatoes Rating", type: :quantitative, bin: [maxbins: 40])
|> Vl.encode(:color, aggregate: :count, type: :quantitative)
|> Vl.config(view: [stroke: :transparent])
Cumulative Frequency Distribution
Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(
sort: [[field: "IMDB Rating"]],
window: [[op: "count", field: "count", as: "Cumulative Count"]],
frame: [:null, 0]
)
|> Vl.mark(:area)
|> Vl.encode_field(:x, "IMDB Rating", type: :quantitative)
|> Vl.encode_field(:y, "Cumulative Count", type: :quantitative)
# |> Vl.to_spec()
Layered Histogram and Cumulative Histogram
Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(bin: true, field: "IMDB Rating", as: "bin_IMDB_Rating")
|> Vl.transform(
aggregate: [[op: "count", as: "count"]],
groupby: ["bin_IMDB_Rating", "bin_IMDB_Rating_end"]
)
|> Vl.transform(filter: "datum.bin_IMDB_Rating !== null")
|> Vl.transform(
sort: [[field: "bin_IMDB_Rating"]],
window: [[op: "sum", field: "count", as: "Cumulative Count"]],
frame: [:null, 0]
)
|> Vl.encode_field(:x, "bin_IMDB_Rating",
type: :quantitative,
bin: :binned,
scale: ["zero", false]
)
|> Vl.encode_field(:x2, "bin_IMDB_Rating_end")
|> Vl.layers([
Vl.new()
|> Vl.mark(:bar)
|> Vl.encode_field(:y, "Cumulative Count", type: :quantitative),
Vl.new()
|> Vl.mark(:bar, opacity: 0.5, color: :yellow)
|> Vl.encode_field(:y, "count", type: :quantitative)
])
Wilkinson Dot Plot
A Wilkinson Dot Plot
data = [
values: [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
3,
3,
4,
4,
4,
4,
4,
4
]
]
Vl.new()
|> Vl.data(data)
|> Vl.transform(window: [[op: "rank", as: "id"]], groupby: ["data"])
|> Vl.mark(:circle, opacity: 1)
|> Vl.encode_field(:x, "data", type: :ordinal)
|> Vl.encode_field(:y, "id", type: :ordinal, sort: :descending)
Relative Bar Chart (Calculate Percentage of Total)
A bar graph showing what activites consume what percentage of the day.
data = [
%{"Activity" => "Sleeping", "Time" => 8},
%{"Activity" => "Eating", "Time" => 2},
%{"Activity" => "TV", "Time" => 4},
%{"Activity" => "Work", "Time" => 8},
%{"Activity" => "Exercise", "Time" => 2}
]
Vl.new(height: [step: 12])
|> Vl.data_from_values(data)
|> Vl.transform(joinaggregate: [[op: "sum", field: "Time", as: "TotalTime"]])
|> Vl.transform(calculate: "datum.Time/datum.TotalTime * 100", as: "PercentOfTotal")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "PercentOfTotal", type: :quantitative, title: "% of total Time")
|> Vl.encode_field(:y, "Activity", type: :nominal)