Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Image caption

image_caption.livemd

Image caption

Mix.install(
  [
    {:instructor, "~> 0.0.5"},
    {:ecto, "~> 3.11"}
  ],
  config: [
    instructor: [
      adapter: Instructor.Adapters.OpenAI,
      openai: [
        api_key: System.fetch_env!("OPENAI_API_KEY"),
        http_options: [receive_timeout: 60_000, connect_options: [protocols: [:http2]]]
      ]
    ]
  ]
)

Section

defmodule ContentHelpers do
  def image_message(image_path, detail \\ "low")
      when is_binary(image_path) and (detail == "low" or detail == "high") do
    image_b64 = image_path |> File.read!() |> Base.encode64()
    image_data = "data:image/jpg;base64," <> image_b64
    %{type: "image_url", image_url: %{url: image_data, detail: detail}}
  end

  def system_message do
    %{
      role: "system",
      content: """
        You are an expert at providing an image description for assistive technology and SEO benefits.
        
        The image is included in an online article titled "The Hidden Gems of Urban Street Art."
        
        The article aims to showcase the vibrant and often overlooked artworks that adorn
        the nooks and crannies around the city of Toronto Canada.
        
        You generate text for two purposes:
        - an HTML img alt text
        - an HTML figure, figcaption text
        
        ## Alt text format
        Briefly describe the contents of the image where the context is focusing on the urban street art.
        Be concise and limit the description to 125 characters or less.
        
        Example alt text:
        > A vibrant phoenix graffiti with blazing orange, red, and gold colors on the side of a brick building in an urban setting.
        
        ## figcaption format
        Image caption descriptions should focus on the urban artwork, providing a description of the appearance,
        style, street address if available, and how it relates to the surroundings. Be concise.
        
        Example caption text:
        > A vibrant phoenix graffiti on a brick building at Queen St W and Spadina Ave. With wings outstretched, the mural's blazing oranges, reds, and golds contrast sharply against the red brick backdrop. Passersby pause to observe, integrating the artwork into the urban landscape.
      """
    }
  end

  def user_message(extra_image_info) do
    %{
      role: "user",
      content: """
        Provide the descriptions for the image. Incorporate relevant information from the following additional details if applicable:

        #{extra_image_info}

        Output in the following JSON format:

        {
          "alt": "generated alt text",
          "figcaption": "generation caption text"
        }
      """
    }
  end
end

defmodule ImageCaptioner do
  use Ecto.Schema
  use Instructor.Validator

  embedded_schema do
    field(:alt_tag, :string)
    field(:figcaption, :string)
  end

  @impl true
  def validate_changeset(changeset) do
    changeset
    |> Ecto.Changeset.validate_length(:alt_tag, less_than_or_equal_to: 125)
    |> Ecto.Changeset.validate_length(:alt_tag, less_than_or_equal_to: 250)
  end
end

image_path = "./scott-webb-DcNlJK7kLkk-unsplash.jpg"

Instructor.chat_completion(
  model: "gpt-4o",
  response_model: ImageCaptioner,
  max_retries: 3,
  messages: [
    ContentHelpers.system_message(),
    ContentHelpers.user_message("image of urban art mural on underpass at 507 King St E"),
    %{role: "user", content: [ContentHelpers.image_message(image_path)]}
  ]
)

# {:ok,
#  %ImageCaptioner{
#    id: "1",
#    alt_tag: "A colorful mural of a face with large, vivid eyes and full lips on an underpass at 507 King St E.",
#    figcaption: "A colorful mural depicting a face with striking eyes and full lips, located on the underpass at 507 King St E. The vibrant greens, yellows, and purples of the artwork bring life to the concrete structure, with the facial features seamlessly integrated into the urban landscape. Pedestrians walk by, adding a sense of scale and immersion to the scene."
#  }}