6_Data_Visualization_Without_Smart_Chart
Mix.install([
{:kino, "~> 0.14.0"},
{:kino_explorer, "~> 0.1.20"},
{:kino_vega_lite, "~> 0.1.11"},
{:tucan, "~> 0.4.1"}
])
Data
The associated files are from https://survey.stackoverflow.co/
Code base from https://github.com/hugobarauna/livebook-notebooks/blob/main/ci_green_streak.livemd
Basic exploration
schema =
Kino.FS.file_path("survey_results_schema.csv")
|> Explorer.DataFrame.from_csv!()
Kino.nothing()
data =
Kino.FS.file_path("survey_results_public.csv")
|> Explorer.DataFrame.from_csv!()
Kino.nothing()
Professional developers by age
There’s a hypthoseis that developers in the Elixir community leans toward older people. I was curious about that and also how it would compare to other communities.
require Explorer.DataFrame
Tucan.scatter(:iris, "petal_width", "petal_length")
|> Tucan.Axes.set_offset(:y, 10)
|> Tucan.Grid.set_enabled(false)
|> VegaLite.to_spec()
language = "Elixir"
developers =
data
|> Explorer.DataFrame.select(["YearsCodePro", "LanguageHaveWorkedWith", "Age"])
|> Explorer.DataFrame.filter(col("YearsCodePro") != "NA")
|> Explorer.DataFrame.mutate([
{^language, contains(col("LanguageHaveWorkedWith"), ^language)}
])
VegaLite.new(title: "Profesional #{language} developers by age", width: 400, height: 200)
|> VegaLite.data_from_values(developers)
|> VegaLite.transform(filter: "datum.#{language} == true")
|> VegaLite.transform(aggregate: [[op: "count", field: "Age", as: "count"]], groupby: ["Age"])
|> VegaLite.transform(
window: [[op: "sum", field: "count", as: "total_count"]],
frame: [nil, nil]
)
|> VegaLite.transform(calculate: "(datum.count / datum.total_count)", as: "percentage")
|> Tucan.layers([
Tucan.new()
|> Tucan.bar("Age", "percentage",
tooltip: true,
x: [
axis: [label_font_size: 14, label_angle: -60],
sort: [
"Under 18 years old",
"18-24 years old",
"25-34 years old",
"35-44 years old",
"45-54 years old",
"55-64 years old",
"65 years or older",
"Prefer not to say"
]
],
y: [
axis: [format: ".0%", title: nil],
scale: [domain: [0, 0.45]]
]
),
VegaLite.new()
|> VegaLite.transform(calculate: "'(n = ' + format(datum.total_count, ',') + ')'", as: "formatted_count")
|> VegaLite.mark(:text,
color: "grey",
font_size: 14,
font_weight: "200",
dx: -50
)
|> VegaLite.encode_field(:text, "formatted_count")
|> VegaLite.encode(:x, value: "width")
|> VegaLite.encode(:y, value: 20)
])
vl_specs =
Enum.map(["Elixir", "Ruby", "Python", "JavaScript", "Go"], fn language ->
developers =
data
|> Explorer.DataFrame.select(["YearsCodePro", "LanguageHaveWorkedWith", "Age"])
|> Explorer.DataFrame.filter(col("YearsCodePro") != "NA")
|> Explorer.DataFrame.mutate([
{^language, contains(col("LanguageHaveWorkedWith"), ^language)}
])
VegaLite.new(title: "Profesional #{language} developers by age", width: 400, height: 200)
|> VegaLite.data_from_values(developers)
|> VegaLite.transform(filter: "datum.#{language} == true")
|> VegaLite.transform(aggregate: [[op: "count", field: "Age", as: "count"]], groupby: ["Age"])
|> VegaLite.transform(
window: [[op: "sum", field: "count", as: "total_count"]],
frame: [nil, nil]
)
|> VegaLite.transform(calculate: "(datum.count / datum.total_count)", as: "percentage")
|> Tucan.layers([
Tucan.new()
|> Tucan.bar("Age", "percentage",
tooltip: true,
x: [
axis: [label_font_size: 14, label_angle: -60],
sort: [
"Under 18 years old",
"18-24 years old",
"25-34 years old",
"35-44 years old",
"45-54 years old",
"55-64 years old",
"65 years or older",
"Prefer not to say"
]
],
y: [
axis: [format: ".0%", title: nil],
scale: [domain: [0, 0.45]]
]
),
VegaLite.new()
|> VegaLite.transform(
calculate: "'(n = ' + format(datum.total_count, ',') + ')'",
as: "formatted_count"
)
|> VegaLite.mark(:text,
color: "grey",
font_size: 14,
font_weight: "200",
dx: -50
)
|> VegaLite.encode_field(:text, "formatted_count")
|> VegaLite.encode(:x, value: "width")
|> VegaLite.encode(:y, value: 20)
])
end)
Kino.nothing()
import Kino.Shorts
grid(
[
markdown("## Professional developers by age"),
Kino.Layout.grid(vl_specs, columns: 2)
],
boxed: true
)
Professional developers by years coding (NOT including education)
vl_specs =
Enum.map(["Elixir", "Ruby", "Python", "JavaScript", "Go"], fn language ->
developers =
data
|> Explorer.DataFrame.select(["YearsCodePro", "LanguageHaveWorkedWith", "Age"])
|> Explorer.DataFrame.filter(col("YearsCodePro") != "NA")
|> Explorer.DataFrame.mutate([
{^language, contains(col("LanguageHaveWorkedWith"), ^language)}
])
VegaLite.new(
title: "Profesional #{language} developers by years coding",
width: 400,
height: 200
)
|> VegaLite.data_from_values(developers)
|> VegaLite.transform(filter: "datum.#{language} == true")
|> Tucan.layers([
Tucan.new()
|> Tucan.histogram("YearsCodePro",
step: 5,
relative: true,
tooltip: :data,
width: 400,
height: 200,
x: [
axis: [title_font_size: 14]
],
y: [scale: [domain: [0, 0.35]]]
)
|> Tucan.Axes.set_x_title("Year coding professionallly, NOT including education")
|> Tucan.Axes.set_y_title(""),
VegaLite.new()
|> VegaLite.transform(aggregate: [[op: "count", as: "total_count"]])
|> VegaLite.transform(
calculate: "'(n = ' + format(datum.total_count, ',') + ')'",
as: "formatted_count"
)
|> VegaLite.mark(:text,
color: "grey",
font_size: 14,
font_weight: "200",
dx: -50
)
|> VegaLite.encode_field(:text, "formatted_count")
|> VegaLite.encode(:x, value: "width")
|> VegaLite.encode(:y, value: 20)
])
end)
Kino.nothing()
grid(
[
markdown("## Professional developers by years coding (NOT including education)"),
Kino.Layout.grid(vl_specs, columns: 2)
],
boxed: true
)