Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Examples

notebooks/examples.livemd

Examples

Mix.install([
  {:unpickler, "~> 0.1.0"},
  {:nx, "~> 0.4.0"}
])
:ok

Setup

We will first define a helper macro to automatically generate a pickle binary from example Python code.

defmodule Pickle do
  @doc """
  Runs the given Python code and returns the pickle binary.

  The code must define a variable `x` and that value is pickled.

  Optionally, a numeric sigil modifier may be given indicating
  the pickle version to use, defaults to version 4.

  Note that a `python` executable must be available.

  ## Examples

      iex> import Pickle
      iex> ~P'''
      ...> x = 1
      ...> '''
      <<128, 4, 75, 1, 46>>

  """
  defmacro sigil_P({:<<>>, _meta, [code]}, modifiers) do
    protocol =
      case modifiers do
        [] -> 4
        [version] -> version - ?0
      end

    code = """
    import pickle
    import base64

    #{code}

    data = pickle.dumps(x, protocol=#{protocol})
    encoded = base64.b32encode(data).decode("utf-8")
    print(encoded, end="")
    """

    {encoded, 0} = System.cmd("python", ["-c", code])
    binary = Base.decode32!(encoded)
    Macro.escape(binary)
  end
end
1 doctest, 0 failures
{:module, Pickle, <<70, 79, 82, 49, 0, 0, 11, ...>>, {:sigil_P, 2}}
import Pickle
Pickle

Basic data structures

~P"""
x = 1
"""
|> Unpickler.load!()
{1, ""}
~P"""
x = [1, 2, 3]
"""
|> Unpickler.load!()
{[1, 2, 3], ""}
~P"""
x = [1, 2.0, "text", (None, True), {"key": "val"}, b"\x01\x00"]
"""
|> Unpickler.load!()
{[1, 2.0, "text", {nil, true}, %{"key" => "val"}, <<1, 0>>], ""}

Custom object resolver

All non-trivial objects are deserialized as %Unpickler.Object{} structs. For example let’s consider a date object:

~P"""
from datetime import date
x = date.fromisoformat("2022-05-17")
"""
|> Unpickler.load!()
{%Unpickler.Object{
   constructor: "datetime.date",
   args: [<<7, 230, 5, 17>>],
   kwargs: %{},
   state: nil,
   append_items: [],
   set_items: []
 }, ""}

However, ideally we would want to load that object as Elixir %Date{}. To do that, we can specify an object resolver:

object_resolver = fn
  # See https://github.com/python/cpython/blob/3.10/Lib/datetime.py#L1094-L1105
  %Unpickler.Object{constructor: "datetime.date", args: [<>]} ->
    {:ok, date} = Date.new(year_hi * 256 + year_lo, month, day)
    {:ok, date}

  _ ->
    :error
end

~P"""
from datetime import date
x = date.fromisoformat("2022-05-17")
"""
|> Unpickler.load!(object_resolver: object_resolver)
{~D[2022-05-17], ""}

Note that %Unpickler.Object{} has a number of different fields, that’s because Python objects may define multiple ways of serializing themselves and we gather all the information under a single unified struct. This way we can easily pattern match on it in the object resolver.

When deserializing complex objects you generally need to look at the corresponding Python source code and reverse-engineer how to load them accordingly. Here are a couple examples:

~P"""
class Point:
  def __init__(self, x, y):
    self.x = x
    self.y = y

x = Point(1, 1)
"""
|> Unpickler.load!()
{%Unpickler.Object{
   constructor: "__main__.Point.__new__",
   args: [%Unpickler.Global{scope: "__main__", name: "Point"}],
   kwargs: %{},
   state: %{"x" => 1, "y" => 1},
   append_items: [],
   set_items: []
 }, ""}
~P"""
class Point:
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __reduce__(self):
    return (Point, (self.x, self.y))

x = Point(1, 1)
"""
|> Unpickler.load!()
{%Unpickler.Object{
   constructor: "__main__.Point",
   args: [1, 1],
   kwargs: %{},
   state: nil,
   append_items: [],
   set_items: []
 }, ""}
~P"""
class Point:
  def __init__(self, x, y):
    self.x = x
    self.y = y

  @classmethod
  def _reconstruct(cls, x, y):
    return cls(x, y)

  def __reduce__(self):
    return (Point._reconstruct, (self.x, self.y))

x = Point(1, 1)
"""
|> Unpickler.load!()
{%Unpickler.Object{
   constructor: "__main__.Point._reconstruct",
   args: [1, 1],
   kwargs: %{},
   state: nil,
   append_items: [],
   set_items: []
 }, ""}
~P"""
class Point:
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __getstate__(self):
    return (self.x, self.y)

  def __setstate__(self, state):
    (x, y) = state
    self.x = x
    self.y = y

x = Point(1, 1)
"""
|> Unpickler.load!()
{%Unpickler.Object{
   constructor: "__main__.Point.__new__",
   args: [%Unpickler.Global{scope: "__main__", name: "Point"}],
   kwargs: %{},
   state: {1, 1},
   append_items: [],
   set_items: []
 }, ""}
~P"""
class Point:
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __reduce__(self):
    append_iter = iter([1, 2])
    set_iter = iter([("a", 1), ("b", 2)])
    return (Point, (self.x, self.y), None, append_iter, set_iter)

x = Point(1, 1)
"""
|> Unpickler.load!()
{%Unpickler.Object{
   constructor: "__main__.Point",
   args: [1, 1],
   kwargs: %{},
   state: nil,
   append_items: [1, 2],
   set_items: [{"a", 1}, {"b", 2}]
 }, ""}