Histograms, Density Plots and Dot Plots

single-view-histogram-plots.livemd

José Geraldo de Carvalho Pereira

@zgcarvalho

vega-lite-examples

Share to X

Share to Bluesky

More notebooks

Histograms, Density Plots and Dot Plots

Mix.install([
  {:vega_lite, "~> 0.1.5"},
  {:kino_vega_lite, "~> 0.1.1"},
  :jason
])

alias VegaLite, as: Vl

Histogram

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "IMDB Rating", bin: true)
|> Vl.encode(:y, aggregate: :count)

Histogram (from Binned Data)

data = [
  %{"bin_start" => 8, "bin_end" => 10, "count" => 7},
  %{"bin_start" => 10, "bin_end" => 12, "count" => 29},
  %{"bin_start" => 12, "bin_end" => 14, "count" => 71},
  %{"bin_start" => 14, "bin_end" => 16, "count" => 127},
  %{"bin_start" => 16, "bin_end" => 18, "count" => 94},
  %{"bin_start" => 18, "bin_end" => 20, "count" => 54},
  %{"bin_start" => 20, "bin_end" => 22, "count" => 17},
  %{"bin_start" => 22, "bin_end" => 24, "count" => 5}
]

Vl.new()
|> Vl.data_from_values(data)
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "bin_start", bin: [binned: true, step: 2])
|> Vl.encode_field(:x2, "bin_end")
|> Vl.encode_field(:y, "count", type: :quantitative)

Log-scaled Histogram

Log-scaled Histogram. We may improve the support of this. See https://github.com/vega/vega-lite/issues/4792.

data = [
  %{"x" => 0.01},
  %{"x" => 0.1},
  %{"x" => 1},
  %{"x" => 1},
  %{"x" => 1},
  %{"x" => 1},
  %{"x" => 10},
  %{"x" => 10},
  %{"x" => 100},
  %{"x" => 500},
  %{"x" => 800}
]

Vl.new()
|> Vl.data_from_values(data)
|> Vl.transform(calculate: "log(datum.x)/log(10)", as: "log_x")
|> Vl.transform(bin: true, field: "log_x", as: "bin_log_x")
|> Vl.transform(calculate: "pow(10, datum.bin_log_x)", as: "x1")
|> Vl.transform(calculate: "pow(10, datum.bin_log_x_end)", as: "x2")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "x1", scale: [type: :log, base: 10], axis: [tick_count: 5])
|> Vl.encode_field(:x2, "x2")
|> Vl.encode(:y, aggregate: :count)

Non-linear Histogram

A histogram for a data source that provides non-linear bins. Thanks to @Saba9 who helps create this example.

data = [
  %{
    "startTime" => "0",
    "endTime" => "8.33",
    "residency" => 0
  },
  %{
    "startTime" => "8.33",
    "endTime" => "12.50",
    "residency" => 0
  },
  %{
    "startTime" => "12.50",
    "endTime" => "16.67",
    "residency" => 31.17
  },
  %{
    "startTime" => "16.67",
    "endTime" => "33.33",
    "residency" => 38.96
  },
  %{
    "startTime" => "33.33",
    "endTime" => "50.00",
    "residency" => 6.49
  },
  %{
    "startTime" => "50.00",
    "endTime" => "66.67",
    "residency" => 2.9
  },
  %{
    "startTime" => "66.67",
    "endTime" => "83.33",
    "residency" => 2.6
  },
  %{
    "startTime" => "83.33",
    "endTime" => "∞",
    "residency" => 16.88
  }
]

Vl.new(width: [step: 40], height: 100, title: "Distribution of Frame Render Time (ms)")
|> Vl.data_from_values(data)
|> Vl.mark(:bar, corner_radius_end: 0, orient: :vertical)
|> Vl.encode_field(:x, "startTime",
  type: :ordinal,
  scale: [type: :point, padding: 0],
  axis: [label_angle: 0]
)
|> Vl.encode_field(:x2, "endTime")
|> Vl.encode_field(:y, "residency",
  type: :quantitative,
  scale: [domain: [0, 100]],
  axis: [label_expr: "datum.label + '%'"]
)

Relative Frequency

Relative frequency histogram. The data is binned with first transform. The number of values per bin and the total number are calculated in the second and third transform to calculate the relative frequency in the last transformation step.

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/cars.json")
|> Vl.transform(bin: true, field: "Horsepower", as: "bin_Horsepower")
|> Vl.transform(
  aggregate: [[op: "count", as: "Count"]],
  groupby: ["bin_Horsepower", "bin_Horsepower_end"]
)
|> Vl.transform(joinaggregate: [[op: "sum", field: "Count", as: "TotalCount"]])
|> Vl.transform(calculate: "datum.Count/datum.TotalCount", as: "PercentOfTotal")
|> Vl.mark(:bar, tooltip: true)
|> Vl.encode_field(:x, "bin_Horsepower", bin: [binned: true], title: "Horsepower")
|> Vl.encode_field(:x2, "bin_Horsepower_end")
|> Vl.encode_field(:y, "PercentOfTotal", type: :quantitative, axis: [format: ".1~%"])

Density Plot

Vl.new(width: 400, height: 100)
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(density: "IMDB Rating", bandwidth: 0.3)
|> Vl.mark(:area)
|> Vl.encode_field(:x, "value", title: "IMDB Rating", type: :quantitative)
|> Vl.encode_field(:y, "density", type: :quantitative)

Stacked Density Estimates

Vl.new(width: 400, height: 80, title: "Distribution of Body Mass of Penguins")
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/penguins.json")
|> Vl.mark(:area, opacity: 0.5)
|> Vl.transform(density: "Body Mass (g)", groupby: ["Species"], extent: [2500, 6500])
# |> Vl.transform(density: "Body Mass (g)", groupby: ["Species"])
|> Vl.encode_field(:x, "value", type: :quantitative, title: "Body Mass (g)")
|> Vl.encode_field(:y, "density", type: :quantitative)
|> Vl.encode_field(:color, "Species", type: :nominal)

2D Histogram Scatterplot

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.mark(:circle)
|> Vl.encode_field(:x, "IMDB Rating", bin: [maxbins: 10])
|> Vl.encode_field(:y, "Rotten Tomatoes Rating", bin: [maxbins: 10])
|> Vl.encode(:size, aggregate: :count)

2D Histogram Heatmap

Vl.new(width: 300, height: 200)
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(
  filter: [
    and: [
      [field: "IMDB Rating", valid: true],
      [field: "Rotten Tomatoes Rating", valid: true]
    ]
  ]
)
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "IMDB Rating", type: :quantitative, bin: [maxbins: 60])
|> Vl.encode_field(:y, "Rotten Tomatoes Rating", type: :quantitative, bin: [maxbins: 40])
|> Vl.encode(:color, aggregate: :count, type: :quantitative)
|> Vl.config(view: [stroke: :transparent])

Cumulative Frequency Distribution

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(
  sort: [[field: "IMDB Rating"]],
  window: [[op: "count", field: "count", as: "Cumulative Count"]],
  frame: [:null, 0]
)
|> Vl.mark(:area)
|> Vl.encode_field(:x, "IMDB Rating", type: :quantitative)
|> Vl.encode_field(:y, "Cumulative Count", type: :quantitative)

# |> Vl.to_spec()

Layered Histogram and Cumulative Histogram

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(bin: true, field: "IMDB Rating", as: "bin_IMDB_Rating")
|> Vl.transform(
  aggregate: [[op: "count", as: "count"]],
  groupby: ["bin_IMDB_Rating", "bin_IMDB_Rating_end"]
)
|> Vl.transform(filter: "datum.bin_IMDB_Rating !== null")
|> Vl.transform(
  sort: [[field: "bin_IMDB_Rating"]],
  window: [[op: "sum", field: "count", as: "Cumulative Count"]],
  frame: [:null, 0]
)
|> Vl.encode_field(:x, "bin_IMDB_Rating",
  type: :quantitative,
  bin: :binned,
  scale: ["zero", false]
)
|> Vl.encode_field(:x2, "bin_IMDB_Rating_end")
|> Vl.layers([
  Vl.new()
  |> Vl.mark(:bar)
  |> Vl.encode_field(:y, "Cumulative Count", type: :quantitative),
  Vl.new()
  |> Vl.mark(:bar, opacity: 0.5, color: :yellow)
  |> Vl.encode_field(:y, "count", type: :quantitative)
])

Wilkinson Dot Plot

A Wilkinson Dot Plot

data = [
  values: [
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    2,
    2,
    2,
    3,
    3,
    4,
    4,
    4,
    4,
    4,
    4
  ]
]

Vl.new()
|> Vl.data(data)
|> Vl.transform(window: [[op: "rank", as: "id"]], groupby: ["data"])
|> Vl.mark(:circle, opacity: 1)
|> Vl.encode_field(:x, "data", type: :ordinal)
|> Vl.encode_field(:y, "id", type: :ordinal, sort: :descending)

Relative Bar Chart (Calculate Percentage of Total)

A bar graph showing what activites consume what percentage of the day.

data = [
  %{"Activity" => "Sleeping", "Time" => 8},
  %{"Activity" => "Eating", "Time" => 2},
  %{"Activity" => "TV", "Time" => 4},
  %{"Activity" => "Work", "Time" => 8},
  %{"Activity" => "Exercise", "Time" => 2}
]

Vl.new(height: [step: 12])
|> Vl.data_from_values(data)
|> Vl.transform(joinaggregate: [[op: "sum", field: "Time", as: "TotalTime"]])
|> Vl.transform(calculate: "datum.Time/datum.TotalTime * 100", as: "PercentOfTotal")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "PercentOfTotal", type: :quantitative, title: "% of total Time")
|> Vl.encode_field(:y, "Activity", type: :nominal)

Other notebooks:

@andyl

elix_util

Examples

vegalite.livemd

tutorial data-science intermediate vega_lite jason

2022-8-18
@TomBers

livebookNotes

Attractors

attractors.livemd

advanced data-science decimal vega_lite kino

2022-8-18
@TomBers

livebookNotes

Fun with Graphs

graphs.livemd

tutorial advanced intermediate vega_lite kino math

2022-8-18
@TomBers

livebookNotes

Epicycloid - draw Curves with Straight Lines

Epicycloid.livemd

tutorial intermediate vega_lite kino math

2022-8-18
Nicolò G.
@nickgnd

programming-machine-learn...

Chapter 19: beyond vanilla networks

beyond_vanilla_networks.livemd

advanced data-science exla nx axon kino kino_vega_lite vega_lite scidata nx_image table_rex

2023-3-14
@DockYard-Academy

curriculum

PicChat: Messages

deprecated_liveview_and_ecto.livemd

tutorial advanced intermediate gen-server otp jason kino youtube hidden_cell

2023-6-5
Nick C
@flowerett

aoc

Day9

day9.livemd

tutorial advanced testing kino_aoc benchee nimble_parsec libgraph math

2023-12-11

Back