Powered by AppSignal & Oban Pro

Parsers

talks/2026-01-21 - parsers/parser.livemd

Parsers

Mix.install([
  {:nimble_parsec, "~> 1.4"}
])

What is a parser?

Parsing, syntax analysis, or syntactic analysis is a process of analyzing a string of symbols, either in natural language, computer languages or data structures, conforming to the rules of a formal grammar by breaking it into parts. The term parsing comes from Latin pars (orationis), meaning part (of speech).

Source: https://en.wikipedia.org/wiki/Parsing

So a parser is basically something that transform data into a syntax that our program can understand, which is called Abstract Syntax Tree or AST:

graph LR
    Input --> p([Parser]) --> AST

For many things we rely on parsers to convert string or binary data into a data structure. Often we rely on parsers that already exist:

  • ISO Date/DateTime
  • JSON
  • XML
  • ….

But what if such a parser does not exist? For example for a basic arithmetic calculation, eg.

  • 1+1
  • 42-2
  • 23*2
  • 10/2

First, we have to think about the syntax of our language:

  • We have the following tokens: a positive <number> and <op>
  • The grammar is <number><operator><number>

We can formally specify this using EBNF (Extended Backus-Naur form):

calculation = number , operator , number ;
operator    = "+" | "-" | "*" | "/" ;
number      = digit , { digit } ;
digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;

Pure Elixir

In Elixir we can use pattern matching to implement the parser:

defmodule Calculation.Parser do
  @type op :: :+ | :- | :* | :/
  @type ast :: {integer(), op(), integer()}

  def parse(str) when is_binary(str) do
    with {:ok, left, rest}  <- parse_int(str),
         # {:ok, rest}        <- parse_ws(rest),
         {:ok, op, rest}    <- parse_op(rest),
         # {:ok, rest}        <- parse_ws(rest),
         {:ok, right, rest} <- parse_int(rest),
         {:ok, :eos}        <- parse_eos(rest) do
      {:ok, {op, left, right}}
    end
  end

  # This could be added to support whitespace: "1 + 1"
  # defp parse_ws(<<" ", rest::binary>>), do: parse_ws(rest)
  # defp parse_ws(<<rest::binary>>), do: {:ok, rest}
  
  # --- integer parsing ---

  defp parse_int(<<c, rest::binary>>) when c in ?0..?9 do
    {digits, rest} = take_digits(rest, <<c>>)
    {:ok, String.to_integer(digits), rest}
  end

  defp parse_int(_), do: {:error, :expected_number}

  defp take_digits(<<c, rest::binary>>, acc) when c in ?0..?9 do
    take_digits(rest, <<acc::binary, c>>)
  end

  defp take_digits(rest, acc), do: {acc, rest}

  # --- operator parsing ---

  defp parse_op(<<"+", rest::binary>>), do: {:ok, :+, rest}
  defp parse_op(<<"-", rest::binary>>), do: {:ok, :-, rest}
  defp parse_op(<<"*", rest::binary>>), do: {:ok, :*, rest}
  defp parse_op(<<"/", rest::binary>>), do: {:ok, :/, rest}
  defp parse_op(_), do: {:error, :expected_operator}

  # --- parse eof ---

  defp parse_eos(<<>>), do: {:ok, :eos}
  defp parse_eos(_), do: {:error, :expected_eos}
end

input = "1+1"
ast   = Calculation.Parser.parse(input)

IO.inspect(ast, label: input)
1+1: {:ok, {:+, 1, 1}}
{:ok, {:+, 1, 1}}
examples = [
  # valid
  "1+1",
  "42-2",
  "23*2",
  "10/2",
  
  # invalid
  "",
  "1 + 2",
  "1?2",
  "1+2 "
]

Enum.map(examples, &{&1, Calculation.Parser.parse(&1)})
[
  {"1+1", {:ok, {:+, 1, 1}}},
  {"42-2", {:ok, {:-, 42, 2}}},
  {"23*2", {:ok, {:*, 23, 2}}},
  {"10/2", {:ok, {:/, 10, 2}}},
  {"", {:error, :expected_number}},
  {"1 + 2", {:error, :expected_operator}},
  {"1?2", {:error, :expected_operator}},
  {"1+2 ", {:error, :expected_eos}}
]

We can then write an interpreter that can evaluate the calculation

defmodule Calculation.Interpreter do
  def eval({op, left, right}) when is_integer(left) and is_integer(right) do
    do_op(op, left, right)
  end

  def eval(_), do: {:error, :invalid_input}

  defp do_op(:+, a, b), do: {:ok, a + b}
  defp do_op(:-, a, b), do: {:ok, a - b}
  defp do_op(:*, a, b), do: {:ok, a * b}
  defp do_op(:/, a, b), do: {:ok, div(a, b)}
  defp do_op(_, _, _), do: {:ok, :invalid_op}
end

input = "1+1"
{:ok, ast} = Calculation.Parser.parse(input)
{:ok, res} = Calculation.Interpreter.eval(ast)

IO.inspect(input, label: "Input")
IO.inspect(ast, label: "AST")
IO.inspect(res, label: "Result")
Input: "1+1"
AST: {:+, 1, 1}
Result: 2
2
Enum.map(examples, fn input ->
  res = with {:ok, ast} <- Calculation.Parser.parse(input),
          do: Calculation.Interpreter.eval(ast)

  {input, res}
end)
[
  {"1+1", {:ok, 2}},
  {"42-2", {:ok, 40}},
  {"23*2", {:ok, 46}},
  {"10/2", {:ok, 5}},
  {"", {:error, :expected_number}},
  {"1 + 2", {:error, :expected_operator}},
  {"1?2", {:error, :expected_operator}},
  {"1+2 ", {:error, :expected_eos}}
]

NimbleParsec

Fortunately, there is the nimble_parsec library that simplifies generating parsers in Elixir:

defmodule Calculation.NimbleParser.Grammar do
  @moduledoc """
  Helper function that define the grammar of our parser. 
  
  NimbleParsec recommends creating a separate module, but this could be 
  in the same module as well.
  """
  
  import NimbleParsec

  # label is used in error messages
  def number(), do: integer(min: 1) |> label("positiv number")
  
  def operator() do
    choice([
      replace(string("+"), :+),
      replace(string("-"), :-),
      replace(string("*"), :*),
      replace(string("/"), :/)
    ]) 
    |> label("operator (+, -, *, /)")
  end

  def calculation() do
    number()
    |> concat(operator())
    |> concat(number())
    |> eos()
    |> reduce({__MODULE__, :to_ast, []})
  end

  def to_ast([left, op, right]), do: {op, left, right}  
end

defmodule Calculation.NimbleParser do
  import NimbleParsec
  import Calculation.NimbleParser.Grammar

  @type op :: :+ | :- | :* | :/
  @type ast :: {op(), integer(), integer()}

  # this defines a private function that parses the calculation
  # you can set debug: true to see how the parser is implemented
  defparsecp :parse_calculation, calculation(), debug: false

  @doc "Public API to match our first parser"
  @spec parse(input :: binary()) :: {:ok, ast()} | {:error, binary()}
  def parse(input) do
    case parse_calculation(input) do
      {:ok, [calc], _rest, _ctx, _line, _offset} -> {:ok, calc}
      {:error, error, _rest, _ctx, _line, _offset} -> {:error, error}
    end
  end
end

Calculation.NimbleParser.parse("1+1")
{:ok, {:+, 1, 1}}
Enum.map(examples, &{&1, Calculation.NimbleParser.parse(&1)})
[
  {"1+1", {:ok, {:+, 1, 1}}},
  {"42-2", {:ok, {:-, 42, 2}}},
  {"23*2", {:ok, {:*, 23, 2}}},
  {"10/2", {:ok, {:/, 10, 2}}},
  {"", {:error, "expected positiv number"}},
  {"1 + 2", {:error, "expected operator (+, -, *, /)"}},
  {"1?2", {:error, "expected operator (+, -, *, /)"}},
  {"1+2 ", {:error, "expected end of string"}}
]