LiveBook Monitoring Dashboard
Mix.install([
{:vix, "~> 0.35.0"},
{:req, "~> 0.5.16"},
{:kino, "~> 0.17.0"},
{:gnat, "~> 1.11"},
{:ex_image_info, "~> 1.0"}
])
Setup
Dashboard Grid Layout
# Create a grid of mini dashboards
Kino.Layout.grid([
Kino.Markdown.new("### [Jaeger](http://jaeger.localhost:8080)\nDistributed Tracing"),
Kino.Markdown.new("### [Grafana](http://grafana.localhost:8080)\nUnified Observability"),
Kino.Markdown.new("### [Prometheus](http://prometheus.localhost:8080)\nMetrics & Alerts"),
Kino.Markdown.new("### [MinIO](http://minio.localhost:8080)\nObject Storage"),
], columns: 3)
Connect to the services
# Set cookie to match your services
Node.set_cookie(:msvc_dev_cookie_change_in_production)
services_map = %{
client: :"client_svc@client_svc.msvc",
user: :"user_svc@user_svc.msvc",
email: :"email_svc@email_svc.msvc",
image: :"image_svc@image_svc.msvc",
}
results =
Map.values(services_map)
|> Enum.map(fn node ->
case Node.connect(node) do
true -> [{:connected, node}]
false ->[ { :failed, node} ]
:ignored -> [{:ignored, node}]
end
end)
Kino.DataTable.new(results, keys: [:connected, :failed, :ignored])
Health checks
Query “/health” endpoints of all the Elixir services from the Docker network
defmodule HealthCheck do
def check_all do
services = [
{"user_svc", 8081},
{"email_svc", 8083},
{"image_svc", 8084},
{"client_svc", 8085},
]
services
|> Enum.map(fn {name, port} ->
url = "http://host.docker.internal:#{port}/health"
status =
case Req.get(url, retry: false, connect_options: [timeout: 1000]) do
{:ok, %{status: 200}} -> "🟢 Healthy"
{:ok, %{status: status}} -> "🟡 #{status}"
{:error, _} -> "🔴 Down"
end
%{
service: name,
port: port,
status: status,
url: url,
}
end)
end
end
# Initial check
timestamp = DateTime.utc_now() |> DateTime.to_string()
frame = Kino.Frame.new() |> Kino.render()
health = HealthCheck.check_all()
table = Kino.DataTable.new(health)
Kino.Frame.append(frame, table)
# Update every 10 seconds
Stream.interval(30_000)
|> Stream.take(10)
|> Kino.animate(fn _->
health = HealthCheck.check_all()
Kino.DataTable.update(table, health)
Kino.Frame.render(frame, table)
end)
ERPC calls to check to each connected service
defmodule NodeInfo do
def check(services_map) do
Map.values(services_map)
|> Enum.map(fn node ->
elixir_version = :erpc.call(node, System, :version, [])
otp_release = :erpc.call(node, :erlang, :system_info, [:otp_release]) |> to_string()
uptime = :erpc.call(node, :erlang, :statistics, [:wall_clock]) |> elem(0)
uptime_hours = Float.round(uptime / 1000 / 60 / 60, 2)
%{
node: node,
elixir: elixir_version,
otp: otp_release,
uptime_hours: uptime_hours,
status: "🟢 Online"
}
end)
end
end
frame = Kino.Frame.new() |> Kino.render()
table = Kino.DataTable.new(NodeInfo.check(services_map))
Kino.Frame.append(frame, table)
Stream.interval(30_000)
|> Stream.take(10)
|> Kino.animate(fn _->
#health = HealthCheck.check_all()
Kino.DataTable.update(table, NodeInfo.check(services_map))
Kino.Frame.render(frame, table)
end)
Email flow
fqdn_client = services_map.client
create = fn i, type -> :erpc.call(fqdn_client, Email, :create, [i, type]) end
A single call:
create.(1, :notification)
Hammer the services via the Email flow
up_to = 50
Enum.to_list(1..up_to)
|> Task.async_stream(
fn i -> create.(i, Enum.random([:welcome, :notification])) end,
ordered: false,
max_concurrency: 10
)
|> Stream.run()
# if = Kino.Frame.new()
iframe_html = """
"""
#Kino.Frame.render(if, Kino.HTML.new(iframe_html))
# if
Image flow
Create a small image:
{:ok, img} = Vix.Vips.Operation.xyz(1_000, 1_000)
{:ok, img} = Vix.Vips.Operation.colourspace(img, :VIPS_INTERPRETATION_LCH)
{:ok, bin_png} = Vix.Vips.Image.write_to_buffer(img, ".png")
img
Test the image flow
user_email = fn i -> "user_#{i}@com" end
convert = fn bin, email ->
:erpc.call(fqdn_client, Image, :convert_bin, [bin, email])
end
convert.(bin_png, user_email.(1))
{:ok, big_img} = Vix.Vips.Operation.xyz(5_000, 5_000)
{:ok, big_img} = Vix.Vips.Operation.colourspace(big_img, :VIPS_INTERPRETATION_LCH)
{:ok, big_png} = Vix.Vips.Image.write_to_buffer(big_img, ".png")
{ExImageInfo.info(big_png), byte_size(big_png)}
user_email = fn i -> "user_#{i}@com" end
convert = fn bin, email ->
:erpc.call(fqdn_client, Image, :convert_bin, [bin, email])
end
convert.(bin_png, user_email.(1))
Image Stress tests
Hammer every 100ms the Image flow
Then check Jaeger and the Grafana dashboards
int = 100
nb = 5
Stream.interval(int)
|> Stream.take(nb)
|> Task.async_stream(fn i -> convert.(bin_png, user_email.(i)) end,
ordered: false,
max_concurrency: 10)
|> Stream.run()
Hammer concurrently the image flow
Then check Jaeger and the Grafana dashboards
1..5
|> Enum.to_list()
|> Stream.take(nb)
|> Task.async_stream(fn i -> convert.(bin_png, user_email.(i)) end,
ordered: false,
max_concurrency: 10)
|> Stream.run()
Remote Service Inspector
# Select a node to inspect
options = [client: "Client", user: "User", image: "Image", email: "Email"]
node_selector = Kino.Input.select("Select Node:",options )
Kino.render(node_selector)
result_frame = Kino.Frame.new()
node_selector
|> Kino.Control.stream()
|> Kino.listen(fn evt ->
node = Map.get(services_map, evt.value)
if node do
# Get process information
process_count = :erpc.call(node, :erlang, :system_info, [:process_count])
memory = :erpc.call(node, :erlang, :memory, [])
memory_mb = (memory[:total] / 1024 / 1024) |> Float.round(2)
info_md = """
### Node: #{node}
- **Process Count:** #{process_count}
- **Total Memory:** #{memory_mb} MB
- **Total Memory (bytes):** #{memory[:total]}
- **Atom Memory:** #{Float.round(memory[:atom] / 1024 / 1024, 2)} MB
- **Binary Memory:** #{Float.round(memory[:binary] / 1024 / 1024, 2)} MB
- **ETS Memory:** #{Float.round(memory[:ets] / 1024 / 1024, 2)} MB
"""
Kino.Frame.render(result_frame, Kino.Markdown.new(info_md))
end
end)
#inspect_button
result_frame