Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

MET-1246 data validation

livebooks/MET-126-data-validation.livemd

MET-1246 data validation

Mix.install([
  {:nx, "~> 0.5"},
  {:exla, "~> 0.5.1"},
  {:explorer, "~> 0.5"},
  {:scholar, git: "https://github.com/elixir-nx/scholar"}
])

Nx.global_default_backend(EXLA.Backend)
:erlang.system_info(:schedulers_online)

Section

base = "/data/livebooks/data/bulk"
rows = File.read!(Path.join(base, "rows.bin")) |> :erlang.binary_to_term()
cols = File.read!(Path.join(base, "columns.bin")) |> :erlang.binary_to_term()
col_count = Enum.count(cols)
col_max = col_count - 1

row_map = Map.new(rows)
col_map = Map.new(cols)
tensor_file = Path.join(base, "matrix.bin")

tensor =
  tensor_file
  |> File.read!()
  |> Nx.from_binary(:f32)

{n} = Nx.shape(tensor)
rows = div(n, col_count)
tensor = Nx.reshape(tensor, {rows, col_count})
input = "/data/livebooks/data/bulk/telemetry_added.csv"
sorted = String.replace(input, ".csv", ".srt")

do_stream_csv = fn filename ->
  filename
  |> File.stream!()
  |> Stream.map(fn line ->
    [dtstring, id, check, instance, valuestring] = String.split(line, ",")
    {dt, _} = Integer.parse(dtstring)
    {value, _} = Float.parse(valuestring)
    {dt, id, check, instance, value}
  end)
end
do_stream_csv.(sorted)
|> Stream.chunk_by(fn {dt, _, _, _, _} -> div(dt, 60) end)
|> Stream.take(1)
|> Enum.to_list()
col = Map.get(col_map, "SHARED_gmaps_GetDirections")
row = Map.get(row_map, div(1_624_662_679, 60))
val = Nx.slice(tensor, [0, col], [1, 1])
{col, row, val, (1347.0 + 1218.0 + 1341.0) / 3.0}
Nx.mean(Nx.tensor([0.0, 0.0]))