Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Extract video frames with kino

extract-video-frames-with-kino.livemd

Extract video frames with kino

Mix.install([
  {:ffmpex, "~> 0.10"},
  {:exile, git: "https://github.com/akash-akya/exile"},
  {:kino, "~> 0.5"},
  {:decimal, "~> 2.0"},
  {:jason, "~> 1.3"}
])

source = "https://storage.googleapis.com/momenti-staging-public-2/content/aespa.mp4"

defmodule Clip do
  defstruct start_time: 0, frame_count: 0, source: source, event: "frame:clip"
end

defmodule Frame do
  defstruct pts: 0.0, data: ""
end

Intro

Process FFmpeg result with stream 에서 구현한 스트림 처리를 코드를 D3 로 구현한 Seekbar 와 연결시켜 보려고합니다. Seekbar 는 Javascript 로 구현되어 있지만, Kino.JS Javascript API 를 사용해 Livebook 에서도 실행할 수 있습니다.

Modules

비디오 파일 처리를 담당하는 Video 모듈을 선언합니다.

  • Video.info/2: 비디오로 부터 duration, frame_rate, thumbnail 정보를 추출합니다.
  • Video.fetch_clip/2:
defmodule Video do
  import FFmpex
  import Kino.JS.Live.Context, only: [broadcast_event: 3]
  use FFmpex.Options

  @soi <<255, 216>>
  @eoi <<255, 217>>

  def fetch_clip(%Clip{} = clip, kino_context) do
    Task.async(fn ->
      fetch_frames(clip, kino_context)
    end)
  end

  def info(source, sample_count \\ 20) do
    {:ok, streams} = FFprobe.streams(source)

    video =
      streams |> Enum.filter(fn %{"codec_type" => type} -> type === "video" end) |> List.first()

    {duration, _} = video |> Map.get("duration") |> Float.parse()
    {frame_count, _} = video |> Map.get("nb_frames") |> Integer.parse()

    frame_rate =
      video
      |> Map.get("r_frame_rate")
      |> String.split("/")
      |> Enum.map(fn num -> Integer.parse(num) |> elem(0) end)
      |> Enum.reverse()
      |> then(fn args -> Kernel.apply(Decimal, :div, args) end)
      |> Decimal.to_float()

    step = frame_rate * (frame_count - 1) / sample_count
    timestamps = for i <- 0..sample_count, do: i * step
    chunk_size = div(sample_count, max(System.schedulers_online(), 2)) + 1

    thumbnails =
      timestamps
      |> Enum.chunk_every(chunk_size)
      |> Enum.map(fn chunks ->
        Task.async(fn ->
          Enum.map(chunks, fn timestamp ->
            fetch_frame(source, timestamp: timestamp, scale: 4)
          end)
        end)
      end)
      |> Task.await_many(:infinity)
      |> Enum.flat_map(&amp; &amp;1)

    %{duration: duration, thumbnails: thumbnails, frame_rate: frame_rate, source: source}
  end

  defp fetch_frame(source, opts) do
    timestamp = Keyword.get(opts, :timestamp)
    scale = Keyword.get(opts, :scale, 2)

    {:ok, image} =
      FFmpex.new_command()
      |> add_input_file(source)
      |> add_file_option(option_ss(timestamp))
      |> to_stdout()
      |> add_stream_specifier(stream_type: :video)
      |> add_stream_option(option_frames(1))
      |> add_stream_option(option_c("mjpeg"))
      |> add_stream_option(option_vf("scale=iw/#{scale}:ih/#{scale}"))
      |> add_stream_option(option_q("31"))
      |> add_stream_option(option_threads("auto"))
      |> add_file_option(option_f("image2pipe"))
      |> execute()

    %{ts: timestamp, dataURL: dataURL(image)}
  end

  defp fetch_frames(
         %Clip{
           start_time: start_time,
           frame_count: frame_count,
           source: source,
           event: event = "frame:clip"
         },
         ctx
       ) do
    broadcast_event(ctx, event, %{type: "init", data: frame_count})

    create_frame_stream({start_time, frame_count, source})
    |> Stream.scan(0, fn image, index ->
      broadcast_event(ctx, event, %{
        type: "image",
        data: %{index: index, image: dataURL(image)}
      })

      index + 1
    end)
    |> Stream.run()
  end

  defp fetch_frames(
         %Clip{
           start_time: start_time,
           frame_count: frame_count,
           source: source,
           event: event
         },
         ctx
       ) do
    create_frame_stream({start_time, frame_count, source})
    |> Stream.each(fn image ->
      broadcast_event(ctx, event, %{
        data: dataURL(image)
      })
    end)
    |> Stream.run()
  end

  defp create_frame_stream({start_time, frame_count, source}) do
    Exile.stream!(
      ~w(ffmpeg -ss #{start_time} -i cache:#{source} -f image2pipe -vf scale=iw/2:ih/2 -c:v mjpeg -q:v 31 -frames:v #{frame_count} -threads auto pipe:1)
    )
    |> Stream.transform(<<>>, &amp;split/2)
    |> Stream.reject(&amp;is_nil/1)
  end

  defp split(part, acc) do
    case :binary.match(part, @eoi) do
      :nomatch ->
        {[], acc <> part}

      {pos, len} ->
        pos = pos + len
        <> = part
        {enum, new_acc} = split(rest, <<>>)
        {[acc <> part | enum], new_acc}
    end
  end

  defp dataURL(image) when is_binary(image) do
    "data:image/jpeg;base64,#{Base.encode64(image)}"
  end
end
data = Video.info(source)

비디오로 부터 가져올 프레임 구간을 선택하고, 확인할 수 있는 Kino wdiget Player 모듈을 선언합니다. D3 brush 를 사용해 비디오의 구간을 선택할 수 있는 Javascript widget 코드를 모듈에 선언합니다.

모듈에서는 Kino.JS.Live.handle_event/3, Kino.JS.Live.Context.broadcast_event/3 를 사용해 widget 으로 부터 전달받은 메시지들을 처리합니다.

Javascript widget 에서는 context.pushEvent, context.handleEvent 를 사용해 서버로 메시지를 보내거나 받을 수 있습니다.

defmodule Player do
  use Kino.JS
  use Kino.JS.Live

  def new(data) do
    Kino.JS.Live.new(__MODULE__, data)
  end

  def init(data, ctx) do
    {:ok, assign(ctx, data)}
  end

  def handle_connect(ctx) do
    {:ok, ctx.assigns |> Map.drop([:task]), ctx}
  end

  def handle_event("update_selection", selection, ctx) do
    [p1, p2] = selection

    frame_count =
      ((p2 - p1) / ctx.assigns.frame_rate)
      |> Decimal.from_float()
      |> Decimal.max("1.0")
      |> Decimal.round()
      |> Decimal.to_integer()

    clip = %Clip{start_time: p1, frame_count: frame_count, source: ctx.assigns.source}

    if Map.has_key?(ctx.assigns, :task) do
      Task.shutdown(ctx.assigns.task, :brutal_kill)
    end

    task = Video.fetch_clip(clip, ctx)

    {:noreply, assign(ctx, selection: selection, task: task)}
  end

  def handle_event("update_position", position, ctx) do
    clip = %Clip{
      start_time: position,
      frame_count: 1,
      source: ctx.assigns.source,
      event: "frame:position"
    }

    Video.fetch_clip(clip, ctx)
    {:noreply, assign(ctx, position: position)}
  end

  def handle_info(_msg, ctx) do
    {:noreply, ctx}
  end

  asset "main.js" do
    """
    import * as _ from 'https://cdn.skypack.dev/lodash';
    import * as d3 from "https://cdn.skypack.dev/d3";
    import dayjs from "https://cdn.skypack.dev/dayjs@1.11.0";
    import duration from "https://cdn.skypack.dev/dayjs@1.11.0/plugin/duration";

    dayjs.extend(duration);

    export function init(ctx, {duration, thumbnails, frame_rate}) {
      const M = { top: 10, right: 20, bottom: 20, left: 20 };
      const W = ctx.root.clientWidth;
      const H = 100;

      const innerW = W - M.left - M.right;
      const innerH = H - M.top - M.bottom;

      const root = d3.select(ctx.root);

      const svg = root.append('svg')
        .attr('width', W)
        .attr('height', H);
        
      // preview
      let images = [];
      let timer = null;
      let direction = 1;

      const image = new Image();
      const tick = () => {
        const value = +range.property('value');
        const max = +range.property('max');
        if ((direction > 0 && value === max) || (direction < 0 && value === 0)) {
          direction = -direction
        }
        const nextValue = value + direction;
        range.property('value', nextValue);
        const data = images[nextValue];
        if (images.length !== 0 && data) {
          image.setAttribute('src', data.image);
        }
      };
      const stop = () => {
        button.text('play');
        if (timer) {
          clearInterval(timer);
          timer = null;
        }
      };
      const play = () => {
        button.text('pause');
        direction = range.attr('value') === images.length - 1 ? -1 : 1;
        timer = setInterval(tick, frame_rate * 1000);
      };

      const preview = root.append('div')
        .attr('id', 'preview')
        .style('margin', '10px 0 10px 20px'); 

      const form = preview.append('form')
        .style('display', 'flex')
        .style('align-items', 'center')
        .style('margin', '10px 0');
      const button = form.append('button')
        .style('width', '5em')
        .style('margin-right', '5px')
        .attr('disabled', true)
        .attr('type', 'button')
        .text('play')
        .on('click', () => {
          if (timer) {
            stop();
          } else {
            play();
          }
        });
      const range = form.append('input')
        .attr('type', 'range')
        .property('min', 0)
        .property('step', 1)
        .property('value', 0)
        .style('width', '300px')
        .on('input', e => {
          const data = images[e.target.value];
          if (images.length !== 0 && data) {
            image.setAttribute('src', data.image);
          }
        });
      const count = form.append('label')
        .style("font-family", "menlo,consolas")
        .style("font-size", "13px")
        .style('margin-left', '5px'); 
      const numer = count.append('span').text(0);
      count.append('span').text(' / ');
      const denom = count.append('span').text(0);

      const canvas = preview.append('canvas')
        .attr('width', 640)
        .attr('height', 360);
      const context = canvas.node().getContext('2d');
      image.addEventListener('load', () => context.drawImage(image, 0, 0, 640, 360));

      // events
      const pushSelection = _.debounce(data => {
        images = [];
        button.attr('disabled', true);
        stop();
        ctx.pushEvent("update_selection", data);
      }, 400);

      const pushPosition = _.debounce(position => {
        stop();
        ctx.pushEvent("update_position", position);
      }, 400);

      ctx.handleEvent("frame:clip", ({type, data}) => {
        if (type === 'init') {
          numer.text('0');
          denom.text(data);
          range.attr('value', 0).attr('max', data - 1);
        } else {
          numer.text(data.index + 1);
          if (data.index === 0) {
            image.setAttribute('src', data.image);
          } else if (data.index === +range.attr('max')) {
            button.attr('disabled', null);
          }
          images.push(data);
        }
      });

      ctx.handleEvent("frame:position", ({data}) => {
        image.setAttribute('src', data);
      });

      // axis
      const xThumbScale = d3
        .scaleBand()
        .domain(thumbnails)
        .range([M.left, W - M.right]);

      const xScale = d3
        .scaleLinear([0, duration], [M.left, W - M.right])
        .clamp(true);

      const xAxis = (g) =>
        g.attr("transform", `translate(0,${H - M.bottom})`)
          .call(d3.axisBottom(xScale));

      // mask
      const mask = svg.append("defs").append("mask").attr("id", "selection-mask");
      mask
        .append("rect")
        .attr("fill", "#444")
        .attr("x", M.left)
        .attr("y", M.top)
        .attr("width", innerW)
        .attr("height", innerH);
      mask
        .append("rect")
        .attr("fill", "#fff")
        .attr("class", "selection")
        .attr("height", innerH);

      // thumbnails
      svg
        .append("g")
        .attr("class", "thumbnails")
        .selectAll("image")
        .data(thumbnails)
        .join("image")
        .attr("x", (d) => xThumbScale(d))
        .attr("y", M.top)
        .attr("width", xThumbScale.bandwidth())
        .attr("height", H - M.bottom - M.top)
        .attr("href", (d) => d.dataURL);
      
      svg.append("g").call(xAxis);

      // brush
      const brush = d3
        .brushX()
        .handleSize(20)
        .extent([[M.left, M.top], [W - M.right, H - M.bottom]])
        .on("start brush end", ({ selection, type }) => {
          if (selection) {
            const [x0, x1] = selection;
            const [ix0, ix1] = selection.map(xScale.invert);
            const [fx0, fx1] = [ix0, ix1].map((d) =>
              dayjs.duration(d * 1000).format("mm:ss")
            );

            svg.select(".thumbnails").attr("mask", "url(#selection-mask)");
            svg
              .select("#selection-mask .selection")
              .attr("x", x0)
              .attr("width", x1 - x0);
            timeCode
              .selectAll(".handle")
              .attr("visibility", type === "brush" ? "visible" : "hidden")
              .attr("x", (d) => (d === "x0" ? x0 : x1))
              .text((d) => (d === "x0" ? fx0 : fx1));

            if (type === "end") {
              pushSelection([ix0, ix1]);
            }
          } else {
            timeCode.selectAll(".handle").attr("visibility", "hidden");
            svg.select(".thumbnails").attr("mask", null);
          }
        });

      const brushEl = svg.append("g");
      brushEl
        .call(brush)
        .call((selection) => {
          selection
            .select(".selection")
            .attr("fill-opacity", 0.1)
            .attr("rx", 8)
            .attr("stroke-width", 5)
            .attr("stroke", "#f7ce45");
          selection
            .selectAll(".handle")
            .attr("fill", "#f7ce45")
            .attr("fill-opacity", 0.5);
        })
        .on("dblclick", function () {
          const selection = d3.brushSelection(this) ? null : xScale.range();
          d3.select(this).call(brush.move, selection);
        });

        // progress
        const progress = svg
          .append("g")
          .append("rect")
          .style("cursor", "pointer")
          .attr("class", "progress")
          .attr("fill", "#e95252")
          .attr("rx", 2)
          .attr("width", 5)
          .attr("x", xScale(0))
          .attr("y", M.top)
          .attr("height", innerH)
          .call(
            d3
              .drag()
              .on("drag", (e) => {
                const [min, max] = xScale.range();
                if (e.x >= min && e.x <= max) {
                  const time = xScale.invert(e.x);
                  progress.attr("x", e.x);
                  timeCode
                    .select(".progress")
                    .attr("visibility", "visible")
                    .attr("x", e.x)
                    .text(dayjs.duration(time * 1000).format("mm:ss"));
                  pushPosition(time);
                }
              })
              .on("end", (e) => {
                timeCode.select(".progress").attr("visibility", "hidden");
              })
          );

      // time code
      const timeCode = svg.append("g");

      timeCode
        .append("text")
        .attr("class", "progress")
        .attr("y", 8)
        .attr("text-anchor", "middle")
        .attr("visibility", "hidden")
        .style("font-family", "menlo,consolas")
        .style("font-size", "11px");

      timeCode
        .selectAll("text.handle")
        .data(["x0", "x1"])
        .enter()
        .append("text")
        .attr("class", "handle")
        .attr("y", 8)
        .attr("text-anchor", "middle")
        .attr("visibility", "hidden")
        .style("font-family", "menlo,consolas")
        .style("font-size", "11px");
    }
    """
  end
end
selection_widget = Player.new(data)