Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Histograms, Density Plots and Dot Plots

single-view-histogram-plots.livemd

Histograms, Density Plots and Dot Plots

Mix.install([
  {:vega_lite, "~> 0.1.5"},
  {:kino_vega_lite, "~> 0.1.1"},
  :jason
])

alias VegaLite, as: Vl

Histogram

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "IMDB Rating", bin: true)
|> Vl.encode(:y, aggregate: :count)

Histogram (from Binned Data)

data = [
  %{"bin_start" => 8, "bin_end" => 10, "count" => 7},
  %{"bin_start" => 10, "bin_end" => 12, "count" => 29},
  %{"bin_start" => 12, "bin_end" => 14, "count" => 71},
  %{"bin_start" => 14, "bin_end" => 16, "count" => 127},
  %{"bin_start" => 16, "bin_end" => 18, "count" => 94},
  %{"bin_start" => 18, "bin_end" => 20, "count" => 54},
  %{"bin_start" => 20, "bin_end" => 22, "count" => 17},
  %{"bin_start" => 22, "bin_end" => 24, "count" => 5}
]

Vl.new()
|> Vl.data_from_values(data)
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "bin_start", bin: [binned: true, step: 2])
|> Vl.encode_field(:x2, "bin_end")
|> Vl.encode_field(:y, "count", type: :quantitative)

Log-scaled Histogram

Log-scaled Histogram. We may improve the support of this. See https://github.com/vega/vega-lite/issues/4792.

data = [
  %{"x" => 0.01},
  %{"x" => 0.1},
  %{"x" => 1},
  %{"x" => 1},
  %{"x" => 1},
  %{"x" => 1},
  %{"x" => 10},
  %{"x" => 10},
  %{"x" => 100},
  %{"x" => 500},
  %{"x" => 800}
]

Vl.new()
|> Vl.data_from_values(data)
|> Vl.transform(calculate: "log(datum.x)/log(10)", as: "log_x")
|> Vl.transform(bin: true, field: "log_x", as: "bin_log_x")
|> Vl.transform(calculate: "pow(10, datum.bin_log_x)", as: "x1")
|> Vl.transform(calculate: "pow(10, datum.bin_log_x_end)", as: "x2")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "x1", scale: [type: :log, base: 10], axis: [tick_count: 5])
|> Vl.encode_field(:x2, "x2")
|> Vl.encode(:y, aggregate: :count)

Non-linear Histogram

A histogram for a data source that provides non-linear bins. Thanks to @Saba9 who helps create this example.

data = [
  %{
    "startTime" => "0",
    "endTime" => "8.33",
    "residency" => 0
  },
  %{
    "startTime" => "8.33",
    "endTime" => "12.50",
    "residency" => 0
  },
  %{
    "startTime" => "12.50",
    "endTime" => "16.67",
    "residency" => 31.17
  },
  %{
    "startTime" => "16.67",
    "endTime" => "33.33",
    "residency" => 38.96
  },
  %{
    "startTime" => "33.33",
    "endTime" => "50.00",
    "residency" => 6.49
  },
  %{
    "startTime" => "50.00",
    "endTime" => "66.67",
    "residency" => 2.9
  },
  %{
    "startTime" => "66.67",
    "endTime" => "83.33",
    "residency" => 2.6
  },
  %{
    "startTime" => "83.33",
    "endTime" => "∞",
    "residency" => 16.88
  }
]

Vl.new(width: [step: 40], height: 100, title: "Distribution of Frame Render Time (ms)")
|> Vl.data_from_values(data)
|> Vl.mark(:bar, corner_radius_end: 0, orient: :vertical)
|> Vl.encode_field(:x, "startTime",
  type: :ordinal,
  scale: [type: :point, padding: 0],
  axis: [label_angle: 0]
)
|> Vl.encode_field(:x2, "endTime")
|> Vl.encode_field(:y, "residency",
  type: :quantitative,
  scale: [domain: [0, 100]],
  axis: [label_expr: "datum.label + '%'"]
)

Relative Frequency

Relative frequency histogram. The data is binned with first transform. The number of values per bin and the total number are calculated in the second and third transform to calculate the relative frequency in the last transformation step.

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/cars.json")
|> Vl.transform(bin: true, field: "Horsepower", as: "bin_Horsepower")
|> Vl.transform(
  aggregate: [[op: "count", as: "Count"]],
  groupby: ["bin_Horsepower", "bin_Horsepower_end"]
)
|> Vl.transform(joinaggregate: [[op: "sum", field: "Count", as: "TotalCount"]])
|> Vl.transform(calculate: "datum.Count/datum.TotalCount", as: "PercentOfTotal")
|> Vl.mark(:bar, tooltip: true)
|> Vl.encode_field(:x, "bin_Horsepower", bin: [binned: true], title: "Horsepower")
|> Vl.encode_field(:x2, "bin_Horsepower_end")
|> Vl.encode_field(:y, "PercentOfTotal", type: :quantitative, axis: [format: ".1~%"])

Density Plot

Vl.new(width: 400, height: 100)
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(density: "IMDB Rating", bandwidth: 0.3)
|> Vl.mark(:area)
|> Vl.encode_field(:x, "value", title: "IMDB Rating", type: :quantitative)
|> Vl.encode_field(:y, "density", type: :quantitative)

Stacked Density Estimates

Vl.new(width: 400, height: 80, title: "Distribution of Body Mass of Penguins")
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/penguins.json")
|> Vl.mark(:area, opacity: 0.5)
|> Vl.transform(density: "Body Mass (g)", groupby: ["Species"], extent: [2500, 6500])
# |> Vl.transform(density: "Body Mass (g)", groupby: ["Species"])
|> Vl.encode_field(:x, "value", type: :quantitative, title: "Body Mass (g)")
|> Vl.encode_field(:y, "density", type: :quantitative)
|> Vl.encode_field(:color, "Species", type: :nominal)

2D Histogram Scatterplot

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.mark(:circle)
|> Vl.encode_field(:x, "IMDB Rating", bin: [maxbins: 10])
|> Vl.encode_field(:y, "Rotten Tomatoes Rating", bin: [maxbins: 10])
|> Vl.encode(:size, aggregate: :count)

2D Histogram Heatmap

Vl.new(width: 300, height: 200)
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(
  filter: [
    and: [
      [field: "IMDB Rating", valid: true],
      [field: "Rotten Tomatoes Rating", valid: true]
    ]
  ]
)
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "IMDB Rating", type: :quantitative, bin: [maxbins: 60])
|> Vl.encode_field(:y, "Rotten Tomatoes Rating", type: :quantitative, bin: [maxbins: 40])
|> Vl.encode(:color, aggregate: :count, type: :quantitative)
|> Vl.config(view: [stroke: :transparent])

Cumulative Frequency Distribution

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(
  sort: [[field: "IMDB Rating"]],
  window: [[op: "count", field: "count", as: "Cumulative Count"]],
  frame: [:null, 0]
)
|> Vl.mark(:area)
|> Vl.encode_field(:x, "IMDB Rating", type: :quantitative)
|> Vl.encode_field(:y, "Cumulative Count", type: :quantitative)

# |> Vl.to_spec()

Layered Histogram and Cumulative Histogram

Vl.new()
|> Vl.data_from_url("https://vega.github.io/vega-lite/examples/data/movies.json")
|> Vl.transform(bin: true, field: "IMDB Rating", as: "bin_IMDB_Rating")
|> Vl.transform(
  aggregate: [[op: "count", as: "count"]],
  groupby: ["bin_IMDB_Rating", "bin_IMDB_Rating_end"]
)
|> Vl.transform(filter: "datum.bin_IMDB_Rating !== null")
|> Vl.transform(
  sort: [[field: "bin_IMDB_Rating"]],
  window: [[op: "sum", field: "count", as: "Cumulative Count"]],
  frame: [:null, 0]
)
|> Vl.encode_field(:x, "bin_IMDB_Rating",
  type: :quantitative,
  bin: :binned,
  scale: ["zero", false]
)
|> Vl.encode_field(:x2, "bin_IMDB_Rating_end")
|> Vl.layers([
  Vl.new()
  |> Vl.mark(:bar)
  |> Vl.encode_field(:y, "Cumulative Count", type: :quantitative),
  Vl.new()
  |> Vl.mark(:bar, opacity: 0.5, color: :yellow)
  |> Vl.encode_field(:y, "count", type: :quantitative)
])

Wilkinson Dot Plot

A Wilkinson Dot Plot

data = [
  values: [
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    2,
    2,
    2,
    3,
    3,
    4,
    4,
    4,
    4,
    4,
    4
  ]
]

Vl.new()
|> Vl.data(data)
|> Vl.transform(window: [[op: "rank", as: "id"]], groupby: ["data"])
|> Vl.mark(:circle, opacity: 1)
|> Vl.encode_field(:x, "data", type: :ordinal)
|> Vl.encode_field(:y, "id", type: :ordinal, sort: :descending)

Relative Bar Chart (Calculate Percentage of Total)

A bar graph showing what activites consume what percentage of the day.

data = [
  %{"Activity" => "Sleeping", "Time" => 8},
  %{"Activity" => "Eating", "Time" => 2},
  %{"Activity" => "TV", "Time" => 4},
  %{"Activity" => "Work", "Time" => 8},
  %{"Activity" => "Exercise", "Time" => 2}
]

Vl.new(height: [step: 12])
|> Vl.data_from_values(data)
|> Vl.transform(joinaggregate: [[op: "sum", field: "Time", as: "TotalTime"]])
|> Vl.transform(calculate: "datum.Time/datum.TotalTime * 100", as: "PercentOfTotal")
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "PercentOfTotal", type: :quantitative, title: "% of total Time")
|> Vl.encode_field(:y, "Activity", type: :nominal)